Merge branch 'master' of https://github.com/rg3/youtube-dl
This commit is contained in:
commit
58214ece59
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.01**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.07**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2016.08.01
|
[debug] youtube-dl version 2016.08.07
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
2
AUTHORS
2
AUTHORS
@ -179,3 +179,5 @@ Jakub Adam Wieczorek
|
|||||||
Aleksandar Topuzović
|
Aleksandar Topuzović
|
||||||
Nehal Patel
|
Nehal Patel
|
||||||
Rob van Bekkum
|
Rob van Bekkum
|
||||||
|
Petr Zvoníček
|
||||||
|
Pratyush Singh
|
||||||
|
68
ChangeLog
68
ChangeLog
@ -1,3 +1,70 @@
|
|||||||
|
version <unreleased>
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [kuwo:singer] Fix extraction
|
||||||
|
* [aparat] Fix extraction
|
||||||
|
|
||||||
|
version 2016.08.07
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ Add support for TV Parental Guidelines ratings in parse_age_limit
|
||||||
|
+ Add decode_png (#9706)
|
||||||
|
+ Add support for partOfTVSeries in JSON-LD
|
||||||
|
* Lower master M3U8 manifest preference for better format sorting
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [discoverygo] Add extractor (#10245)
|
||||||
|
* [flipagram] Make JSON-LD extraction non fatal
|
||||||
|
* [generic] Make JSON-LD extraction non fatal
|
||||||
|
+ [bbc] Add support for morph embeds (#10239)
|
||||||
|
* [tnaflixnetworkbase] Improve title extraction
|
||||||
|
* [tnaflix] Fix metadata extraction (#10249)
|
||||||
|
* [fox] Fix theplatform release URL query
|
||||||
|
* [openload] Fix extraction (#9706)
|
||||||
|
* [bbc] Skip duplicate manifest URLs
|
||||||
|
* [bbc] Improve format code
|
||||||
|
+ [bbc] Add support for DASH and F4M
|
||||||
|
* [bbc] Improve format sorting and listing
|
||||||
|
* [bbc] Improve playlist extraction
|
||||||
|
+ [pokemon] Add extractor (#10093)
|
||||||
|
+ [condenast] Add fallback scenario for video info extraction
|
||||||
|
|
||||||
|
|
||||||
|
version 2016.08.06
|
||||||
|
|
||||||
|
Core
|
||||||
|
* Add support for JSON-LD root list entries (#10203)
|
||||||
|
* Improve unified_timestamp
|
||||||
|
* Lower preference of RTSP formats in generic sorting
|
||||||
|
+ Add support for multiple properties in _og_search_property
|
||||||
|
* Improve password hiding from verbose output
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [adultswim] Add support for trailers (#10235)
|
||||||
|
* [archiveorg] Improve extraction (#10219)
|
||||||
|
+ [jwplatform] Add support for playlists
|
||||||
|
+ [jwplatform] Add support for relative URLs
|
||||||
|
* [jwplatform] Improve audio detection
|
||||||
|
+ [tvplay] Capture and output native error message
|
||||||
|
+ [tvplay] Extract series metadata
|
||||||
|
+ [tvplay] Add support for subtitles (#10194)
|
||||||
|
* [tvp] Improve extraction (#7799)
|
||||||
|
* [cbslocal] Fix timestamp parsing (#10213)
|
||||||
|
+ [naver] Add support for subtitles (#8096)
|
||||||
|
* [naver] Improve extraction
|
||||||
|
* [condenast] Improve extraction
|
||||||
|
* [engadget] Relax URL regular expression
|
||||||
|
* [5min] Fix extraction
|
||||||
|
+ [nationalgeographic] Add support for Episode Guide
|
||||||
|
+ [kaltura] Add support for subtitles
|
||||||
|
* [kaltura] Optimize network requests
|
||||||
|
+ [vodplatform] Add extractor for vod-platform.net
|
||||||
|
- [gamekings] Remove extractor
|
||||||
|
* [limelight] Extract HTTP formats
|
||||||
|
* [ntvru] Fix extraction
|
||||||
|
+ [comedycentral] Re-add :tds and :thedailyshow shortnames
|
||||||
|
|
||||||
|
|
||||||
version 2016.08.01
|
version 2016.08.01
|
||||||
|
|
||||||
Fixed/improved extractors
|
Fixed/improved extractors
|
||||||
@ -7,6 +74,7 @@ Fixed/improved extractors
|
|||||||
- [safari] Relax regular expressions for URL matching (#10202)
|
- [safari] Relax regular expressions for URL matching (#10202)
|
||||||
- [cwtv] Add support for cwtvpr.com (#10196)
|
- [cwtv] Add support for cwtvpr.com (#10196)
|
||||||
|
|
||||||
|
|
||||||
version 2016.07.30
|
version 2016.07.30
|
||||||
|
|
||||||
Fixed/improved extractors
|
Fixed/improved extractors
|
||||||
|
@ -424,7 +424,7 @@ which means you can modify it, redistribute it or use it however you like.
|
|||||||
|
|
||||||
# CONFIGURATION
|
# CONFIGURATION
|
||||||
|
|
||||||
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config/youtube-dl.conf`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
|
You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and OS X, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
|
||||||
|
|
||||||
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
|
||||||
```
|
```
|
||||||
@ -1196,7 +1196,7 @@ Make sure that someone has not already opened the issue you're trying to open. S
|
|||||||
|
|
||||||
### Why are existing options not enough?
|
### Why are existing options not enough?
|
||||||
|
|
||||||
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
|
Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
|
||||||
|
|
||||||
### Is there enough context in your bug report?
|
### Is there enough context in your bug report?
|
||||||
|
|
||||||
|
@ -142,6 +142,7 @@
|
|||||||
- **CollegeRama**
|
- **CollegeRama**
|
||||||
- **ComCarCoff**
|
- **ComCarCoff**
|
||||||
- **ComedyCentral**
|
- **ComedyCentral**
|
||||||
|
- **ComedyCentralShortname**
|
||||||
- **ComedyCentralTV**
|
- **ComedyCentralTV**
|
||||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||||
- **Coub**
|
- **Coub**
|
||||||
@ -181,6 +182,7 @@
|
|||||||
- **DigitallySpeaking**
|
- **DigitallySpeaking**
|
||||||
- **Digiteka**
|
- **Digiteka**
|
||||||
- **Discovery**
|
- **Discovery**
|
||||||
|
- **DiscoveryGo**
|
||||||
- **Dotsub**
|
- **Dotsub**
|
||||||
- **DouyuTV**: 斗鱼
|
- **DouyuTV**: 斗鱼
|
||||||
- **DPlay**
|
- **DPlay**
|
||||||
@ -247,7 +249,6 @@
|
|||||||
- **FunnyOrDie**
|
- **FunnyOrDie**
|
||||||
- **Fusion**
|
- **Fusion**
|
||||||
- **GameInformer**
|
- **GameInformer**
|
||||||
- **Gamekings**
|
|
||||||
- **GameOne**
|
- **GameOne**
|
||||||
- **gameone:playlist**
|
- **gameone:playlist**
|
||||||
- **Gamersyde**
|
- **Gamersyde**
|
||||||
@ -415,7 +416,8 @@
|
|||||||
- **MyVidster**
|
- **MyVidster**
|
||||||
- **n-tv.de**
|
- **n-tv.de**
|
||||||
- **natgeo**
|
- **natgeo**
|
||||||
- **natgeo:channel**
|
- **natgeo:episodeguide**
|
||||||
|
- **natgeo:video**
|
||||||
- **Naver**
|
- **Naver**
|
||||||
- **NBA**
|
- **NBA**
|
||||||
- **NBC**
|
- **NBC**
|
||||||
@ -517,6 +519,7 @@
|
|||||||
- **plus.google**: Google Plus
|
- **plus.google**: Google Plus
|
||||||
- **pluzz.francetv.fr**
|
- **pluzz.francetv.fr**
|
||||||
- **podomatic**
|
- **podomatic**
|
||||||
|
- **Pokemon**
|
||||||
- **PolskieRadio**
|
- **PolskieRadio**
|
||||||
- **PornHd**
|
- **PornHd**
|
||||||
- **PornHub**: PornHub and Thumbzilla
|
- **PornHub**: PornHub and Thumbzilla
|
||||||
@ -726,6 +729,7 @@
|
|||||||
- **tvigle**: Интернет-телевидение Tvigle.ru
|
- **tvigle**: Интернет-телевидение Tvigle.ru
|
||||||
- **tvland.com**
|
- **tvland.com**
|
||||||
- **tvp**: Telewizja Polska
|
- **tvp**: Telewizja Polska
|
||||||
|
- **tvp:embed**: Telewizja Polska
|
||||||
- **tvp:series**
|
- **tvp:series**
|
||||||
- **TVPlay**: TV3Play and related services
|
- **TVPlay**: TV3Play and related services
|
||||||
- **Tweakers**
|
- **Tweakers**
|
||||||
@ -805,6 +809,7 @@
|
|||||||
- **vk:wallpost**
|
- **vk:wallpost**
|
||||||
- **vlive**
|
- **vlive**
|
||||||
- **Vodlocker**
|
- **Vodlocker**
|
||||||
|
- **VODPlatform**
|
||||||
- **VoiceRepublic**
|
- **VoiceRepublic**
|
||||||
- **VoxMedia**
|
- **VoxMedia**
|
||||||
- **Vporn**
|
- **Vporn**
|
||||||
|
@ -42,6 +42,7 @@ from youtube_dl.utils import (
|
|||||||
ohdave_rsa_encrypt,
|
ohdave_rsa_encrypt,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
parse_age_limit,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
parse_count,
|
parse_count,
|
||||||
@ -308,6 +309,7 @@ class TestUtil(unittest.TestCase):
|
|||||||
self.assertEqual(unified_timestamp('25-09-2014'), 1411603200)
|
self.assertEqual(unified_timestamp('25-09-2014'), 1411603200)
|
||||||
self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
|
self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200)
|
||||||
self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
|
self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None)
|
||||||
|
self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500)
|
||||||
|
|
||||||
def test_determine_ext(self):
|
def test_determine_ext(self):
|
||||||
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4')
|
||||||
@ -431,6 +433,20 @@ class TestUtil(unittest.TestCase):
|
|||||||
url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
|
url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'),
|
||||||
'trailer.mp4')
|
'trailer.mp4')
|
||||||
|
|
||||||
|
def test_parse_age_limit(self):
|
||||||
|
self.assertEqual(parse_age_limit(None), None)
|
||||||
|
self.assertEqual(parse_age_limit(False), None)
|
||||||
|
self.assertEqual(parse_age_limit('invalid'), None)
|
||||||
|
self.assertEqual(parse_age_limit(0), 0)
|
||||||
|
self.assertEqual(parse_age_limit(18), 18)
|
||||||
|
self.assertEqual(parse_age_limit(21), 21)
|
||||||
|
self.assertEqual(parse_age_limit(22), None)
|
||||||
|
self.assertEqual(parse_age_limit('18'), 18)
|
||||||
|
self.assertEqual(parse_age_limit('18+'), 18)
|
||||||
|
self.assertEqual(parse_age_limit('PG-13'), 13)
|
||||||
|
self.assertEqual(parse_age_limit('TV-14'), 14)
|
||||||
|
self.assertEqual(parse_age_limit('TV-MA'), 17)
|
||||||
|
|
||||||
def test_parse_duration(self):
|
def test_parse_duration(self):
|
||||||
self.assertEqual(parse_duration(None), None)
|
self.assertEqual(parse_duration(None), None)
|
||||||
self.assertEqual(parse_duration(False), None)
|
self.assertEqual(parse_duration(False), None)
|
||||||
|
@ -249,7 +249,16 @@ class YoutubeDL(object):
|
|||||||
source_address: (Experimental) Client-side IP address to bind to.
|
source_address: (Experimental) Client-side IP address to bind to.
|
||||||
call_home: Boolean, true iff we are allowed to contact the
|
call_home: Boolean, true iff we are allowed to contact the
|
||||||
youtube-dl servers for debugging.
|
youtube-dl servers for debugging.
|
||||||
sleep_interval: Number of seconds to sleep before each download.
|
sleep_interval: Number of seconds to sleep before each download when
|
||||||
|
used alone or a lower bound of a range for randomized
|
||||||
|
sleep before each download (minimum possible number
|
||||||
|
of seconds to sleep) when used along with
|
||||||
|
max_sleep_interval.
|
||||||
|
max_sleep_interval:Upper bound of a range for randomized sleep before each
|
||||||
|
download (maximum possible number of seconds to sleep).
|
||||||
|
Must only be used along with sleep_interval.
|
||||||
|
Actual sleep time will be a random float from range
|
||||||
|
[sleep_interval; max_sleep_interval].
|
||||||
listformats: Print an overview of available video formats and exit.
|
listformats: Print an overview of available video formats and exit.
|
||||||
list_thumbnails: Print a table of all thumbnails and exit.
|
list_thumbnails: Print a table of all thumbnails and exit.
|
||||||
match_filter: A function that gets called with the info_dict of
|
match_filter: A function that gets called with the info_dict of
|
||||||
|
@ -145,6 +145,16 @@ def _real_main(argv=None):
|
|||||||
if numeric_limit is None:
|
if numeric_limit is None:
|
||||||
parser.error('invalid max_filesize specified')
|
parser.error('invalid max_filesize specified')
|
||||||
opts.max_filesize = numeric_limit
|
opts.max_filesize = numeric_limit
|
||||||
|
if opts.sleep_interval is not None:
|
||||||
|
if opts.sleep_interval < 0:
|
||||||
|
parser.error('sleep interval must be positive or 0')
|
||||||
|
if opts.max_sleep_interval is not None:
|
||||||
|
if opts.max_sleep_interval < 0:
|
||||||
|
parser.error('max sleep interval must be positive or 0')
|
||||||
|
if opts.max_sleep_interval < opts.sleep_interval:
|
||||||
|
parser.error('max sleep interval must be greater than or equal to min sleep interval')
|
||||||
|
else:
|
||||||
|
opts.max_sleep_interval = opts.sleep_interval
|
||||||
|
|
||||||
def parse_retries(retries):
|
def parse_retries(retries):
|
||||||
if retries in ('inf', 'infinite'):
|
if retries in ('inf', 'infinite'):
|
||||||
@ -370,6 +380,7 @@ def _real_main(argv=None):
|
|||||||
'source_address': opts.source_address,
|
'source_address': opts.source_address,
|
||||||
'call_home': opts.call_home,
|
'call_home': opts.call_home,
|
||||||
'sleep_interval': opts.sleep_interval,
|
'sleep_interval': opts.sleep_interval,
|
||||||
|
'max_sleep_interval': opts.max_sleep_interval,
|
||||||
'external_downloader': opts.external_downloader,
|
'external_downloader': opts.external_downloader,
|
||||||
'list_thumbnails': opts.list_thumbnails,
|
'list_thumbnails': opts.list_thumbnails,
|
||||||
'playlist_items': opts.playlist_items,
|
'playlist_items': opts.playlist_items,
|
||||||
|
@ -4,6 +4,7 @@ import os
|
|||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
import random
|
||||||
|
|
||||||
from ..compat import compat_os_name
|
from ..compat import compat_os_name
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -342,8 +343,11 @@ class FileDownloader(object):
|
|||||||
})
|
})
|
||||||
return True
|
return True
|
||||||
|
|
||||||
sleep_interval = self.params.get('sleep_interval')
|
min_sleep_interval = self.params.get('sleep_interval')
|
||||||
if sleep_interval:
|
if min_sleep_interval:
|
||||||
|
max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
|
||||||
|
print(min_sleep_interval, max_sleep_interval)
|
||||||
|
sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
|
||||||
self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
|
self.to_screen('[download] Sleeping %s seconds...' % sleep_interval)
|
||||||
time.sleep(sleep_interval)
|
time.sleep(sleep_interval)
|
||||||
|
|
||||||
|
@ -83,6 +83,20 @@ class AdultSwimIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# heroMetadata.trailer
|
||||||
|
'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'I0LQFQkaSUaFp8PnAWHhoQ',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Decker - Inside Decker: A New Hero',
|
||||||
|
'description': 'md5:c916df071d425d62d70c86d4399d3ee0',
|
||||||
|
'duration': 249.008,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -133,20 +147,26 @@ class AdultSwimIE(InfoExtractor):
|
|||||||
if video_info is None:
|
if video_info is None:
|
||||||
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
|
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
|
||||||
video_info = bootstrapped_data['slugged_video']
|
video_info = bootstrapped_data['slugged_video']
|
||||||
else:
|
if not video_info:
|
||||||
raise ExtractorError('Unable to find video info')
|
video_info = bootstrapped_data.get('heroMetadata', {}).get('trailer').get('video')
|
||||||
|
if not video_info:
|
||||||
|
raise ExtractorError('Unable to find video info')
|
||||||
|
|
||||||
show = bootstrapped_data['show']
|
show = bootstrapped_data['show']
|
||||||
show_title = show['title']
|
show_title = show['title']
|
||||||
stream = video_info.get('stream')
|
stream = video_info.get('stream')
|
||||||
clips = [stream] if stream else video_info.get('clips')
|
if stream and stream.get('videoPlaybackID'):
|
||||||
if not clips:
|
segment_ids = [stream['videoPlaybackID']]
|
||||||
|
elif video_info.get('clips'):
|
||||||
|
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
|
||||||
|
elif video_info.get('videoPlaybackID'):
|
||||||
|
segment_ids = [video_info['videoPlaybackID']]
|
||||||
|
else:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'This video is only available via cable service provider subscription that'
|
'This video is only available via cable service provider subscription that'
|
||||||
' is not currently supported. You may want to use --cookies.'
|
' is not currently supported. You may want to use --cookies.'
|
||||||
if video_info.get('auth') is True else 'Unable to find stream or clips',
|
if video_info.get('auth') is True else 'Unable to find stream or clips',
|
||||||
expected=True)
|
expected=True)
|
||||||
segment_ids = [clip['videoPlaybackID'] for clip in clips]
|
|
||||||
|
|
||||||
episode_id = video_info['id']
|
episode_id = video_info['id']
|
||||||
episode_title = video_info['title']
|
episode_title = video_info['title']
|
||||||
|
@ -123,6 +123,10 @@ class AolFeaturesIE(InfoExtractor):
|
|||||||
'title': 'What To Watch - February 17, 2016',
|
'title': 'What To Watch - February 17, 2016',
|
||||||
},
|
},
|
||||||
'add_ie': ['FiveMin'],
|
'add_ie': ['FiveMin'],
|
||||||
|
'params': {
|
||||||
|
# encrypted m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -1,8 +1,6 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -15,7 +13,7 @@ class AparatIE(InfoExtractor):
|
|||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.aparat.com/v/wP8On',
|
'url': 'http://www.aparat.com/v/wP8On',
|
||||||
'md5': '6714e0af7e0d875c5a39c4dc4ab46ad1',
|
'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'wP8On',
|
'id': 'wP8On',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -31,13 +29,13 @@ class AparatIE(InfoExtractor):
|
|||||||
# Note: There is an easier-to-parse configuration at
|
# Note: There is an easier-to-parse configuration at
|
||||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||||
# but the URL in there does not work
|
# but the URL in there does not work
|
||||||
embed_url = ('http://www.aparat.com/video/video/embed/videohash/' +
|
embed_url = 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id
|
||||||
video_id + '/vt/frame')
|
|
||||||
webpage = self._download_webpage(embed_url, video_id)
|
webpage = self._download_webpage(embed_url, video_id)
|
||||||
|
|
||||||
video_urls = [video_url.replace('\\/', '/') for video_url in re.findall(
|
file_list = self._parse_json(self._search_regex(
|
||||||
r'(?:fileList\[[0-9]+\]\s*=|"file"\s*:)\s*"([^"]+)"', webpage)]
|
r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, 'file list'), video_id)
|
||||||
for i, video_url in enumerate(video_urls):
|
for i, item in enumerate(file_list[0]):
|
||||||
|
video_url = item['file']
|
||||||
req = HEADRequest(video_url)
|
req = HEADRequest(video_url)
|
||||||
res = self._request_webpage(
|
res = self._request_webpage(
|
||||||
req, video_id, note='Testing video URL %d' % i, errnote=False)
|
req, video_id, note='Testing video URL %d' % i, errnote=False)
|
||||||
|
@ -1,67 +1,65 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .jwplatform import JWPlatformBaseIE
|
||||||
from ..utils import unified_strdate
|
from ..utils import (
|
||||||
|
unified_strdate,
|
||||||
|
clean_html,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class ArchiveOrgIE(InfoExtractor):
|
class ArchiveOrgIE(JWPlatformBaseIE):
|
||||||
IE_NAME = 'archive.org'
|
IE_NAME = 'archive.org'
|
||||||
IE_DESC = 'archive.org videos'
|
IE_DESC = 'archive.org videos'
|
||||||
_VALID_URL = r'https?://(?:www\.)?archive\.org/details/(?P<id>[^?/]+)(?:[?].*)?$'
|
_VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||||
'md5': '8af1d4cf447933ed3c7f4871162602db',
|
'md5': '8af1d4cf447933ed3c7f4871162602db',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||||
'ext': 'ogv',
|
'ext': 'ogg',
|
||||||
'title': '1968 Demo - FJCC Conference Presentation Reel #1',
|
'title': '1968 Demo - FJCC Conference Presentation Reel #1',
|
||||||
'description': 'md5:1780b464abaca9991d8968c877bb53ed',
|
'description': 'md5:da45c349df039f1cc8075268eb1b5c25',
|
||||||
'upload_date': '19681210',
|
'upload_date': '19681210',
|
||||||
'uploader': 'SRI International'
|
'uploader': 'SRI International'
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://archive.org/details/Cops1922',
|
'url': 'https://archive.org/details/Cops1922',
|
||||||
'md5': '18f2a19e6d89af8425671da1cf3d4e04',
|
'md5': 'bc73c8ab3838b5a8fc6c6651fa7b58ba',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'Cops1922',
|
'id': 'Cops1922',
|
||||||
'ext': 'ogv',
|
'ext': 'mp4',
|
||||||
'title': 'Buster Keaton\'s "Cops" (1922)',
|
'title': 'Buster Keaton\'s "Cops" (1922)',
|
||||||
'description': 'md5:70f72ee70882f713d4578725461ffcc3',
|
'description': 'md5:b4544662605877edd99df22f9620d858',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://archive.org/embed/' + video_id, video_id)
|
||||||
|
jwplayer_playlist = self._parse_json(self._search_regex(
|
||||||
|
r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\);",
|
||||||
|
webpage, 'jwplayer playlist'), video_id)
|
||||||
|
info = self._parse_jwplayer_data(
|
||||||
|
{'playlist': jwplayer_playlist}, video_id, base_url=url)
|
||||||
|
|
||||||
json_url = url + ('&' if '?' in url else '?') + 'output=json'
|
def get_optional(metadata, field):
|
||||||
data = self._download_json(json_url, video_id)
|
return metadata.get(field, [None])[0]
|
||||||
|
|
||||||
def get_optional(data_dict, field):
|
metadata = self._download_json(
|
||||||
return data_dict['metadata'].get(field, [None])[0]
|
'http://archive.org/details/' + video_id, video_id, query={
|
||||||
|
'output': 'json',
|
||||||
title = get_optional(data, 'title')
|
})['metadata']
|
||||||
description = get_optional(data, 'description')
|
info.update({
|
||||||
uploader = get_optional(data, 'creator')
|
'title': get_optional(metadata, 'title') or info.get('title'),
|
||||||
upload_date = unified_strdate(get_optional(data, 'date'))
|
'description': clean_html(get_optional(metadata, 'description')),
|
||||||
|
})
|
||||||
formats = [
|
if info.get('_type') != 'playlist':
|
||||||
{
|
info.update({
|
||||||
'format': fdata['format'],
|
'uploader': get_optional(metadata, 'creator'),
|
||||||
'url': 'http://' + data['server'] + data['dir'] + fn,
|
'upload_date': unified_strdate(get_optional(metadata, 'date')),
|
||||||
'file_size': int(fdata['size']),
|
})
|
||||||
}
|
return info
|
||||||
for fn, fdata in data['files'].items()
|
|
||||||
if 'Video' in fdata['format']]
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'video',
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'formats': formats,
|
|
||||||
'description': description,
|
|
||||||
'uploader': uploader,
|
|
||||||
'upload_date': upload_date,
|
|
||||||
'thumbnail': data.get('misc', {}).get('image'),
|
|
||||||
}
|
|
||||||
|
@ -5,11 +5,13 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
dict_get,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
@ -229,51 +231,6 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
|
||||||
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
return [ref.get('href') for ref in asx.findall('./Entry/ref')]
|
||||||
|
|
||||||
def _extract_connection(self, connection, programme_id):
|
|
||||||
formats = []
|
|
||||||
kind = connection.get('kind')
|
|
||||||
protocol = connection.get('protocol')
|
|
||||||
supplier = connection.get('supplier')
|
|
||||||
if protocol == 'http':
|
|
||||||
href = connection.get('href')
|
|
||||||
transfer_format = connection.get('transferFormat')
|
|
||||||
# ASX playlist
|
|
||||||
if supplier == 'asx':
|
|
||||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
|
||||||
formats.append({
|
|
||||||
'url': ref,
|
|
||||||
'format_id': 'ref%s_%s' % (i, supplier),
|
|
||||||
})
|
|
||||||
# Skip DASH until supported
|
|
||||||
elif transfer_format == 'dash':
|
|
||||||
pass
|
|
||||||
elif transfer_format == 'hls':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
|
||||||
m3u8_id=supplier, fatal=False))
|
|
||||||
# Direct link
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': href,
|
|
||||||
'format_id': supplier or kind or protocol,
|
|
||||||
})
|
|
||||||
elif protocol == 'rtmp':
|
|
||||||
application = connection.get('application', 'ondemand')
|
|
||||||
auth_string = connection.get('authString')
|
|
||||||
identifier = connection.get('identifier')
|
|
||||||
server = connection.get('server')
|
|
||||||
formats.append({
|
|
||||||
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
|
||||||
'play_path': identifier,
|
|
||||||
'app': '%s?%s' % (application, auth_string),
|
|
||||||
'page_url': 'http://www.bbc.co.uk',
|
|
||||||
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
|
||||||
'rtmp_live': False,
|
|
||||||
'ext': 'flv',
|
|
||||||
'format_id': supplier,
|
|
||||||
})
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _extract_items(self, playlist):
|
def _extract_items(self, playlist):
|
||||||
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
||||||
|
|
||||||
@ -294,46 +251,6 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
def _extract_connections(self, media):
|
def _extract_connections(self, media):
|
||||||
return self._findall_ns(media, './{%s}connection')
|
return self._findall_ns(media, './{%s}connection')
|
||||||
|
|
||||||
def _extract_video(self, media, programme_id):
|
|
||||||
formats = []
|
|
||||||
vbr = int_or_none(media.get('bitrate'))
|
|
||||||
vcodec = media.get('encoding')
|
|
||||||
service = media.get('service')
|
|
||||||
width = int_or_none(media.get('width'))
|
|
||||||
height = int_or_none(media.get('height'))
|
|
||||||
file_size = int_or_none(media.get('media_file_size'))
|
|
||||||
for connection in self._extract_connections(media):
|
|
||||||
conn_formats = self._extract_connection(connection, programme_id)
|
|
||||||
for format in conn_formats:
|
|
||||||
format.update({
|
|
||||||
'width': width,
|
|
||||||
'height': height,
|
|
||||||
'vbr': vbr,
|
|
||||||
'vcodec': vcodec,
|
|
||||||
'filesize': file_size,
|
|
||||||
})
|
|
||||||
if service:
|
|
||||||
format['format_id'] = '%s_%s' % (service, format['format_id'])
|
|
||||||
formats.extend(conn_formats)
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _extract_audio(self, media, programme_id):
|
|
||||||
formats = []
|
|
||||||
abr = int_or_none(media.get('bitrate'))
|
|
||||||
acodec = media.get('encoding')
|
|
||||||
service = media.get('service')
|
|
||||||
for connection in self._extract_connections(media):
|
|
||||||
conn_formats = self._extract_connection(connection, programme_id)
|
|
||||||
for format in conn_formats:
|
|
||||||
format.update({
|
|
||||||
'format_id': '%s_%s' % (service, format['format_id']),
|
|
||||||
'abr': abr,
|
|
||||||
'acodec': acodec,
|
|
||||||
'vcodec': 'none',
|
|
||||||
})
|
|
||||||
formats.extend(conn_formats)
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _get_subtitles(self, media, programme_id):
|
def _get_subtitles(self, media, programme_id):
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for connection in self._extract_connections(media):
|
for connection in self._extract_connections(media):
|
||||||
@ -379,13 +296,87 @@ class BBCCoUkIE(InfoExtractor):
|
|||||||
def _process_media_selector(self, media_selection, programme_id):
|
def _process_media_selector(self, media_selection, programme_id):
|
||||||
formats = []
|
formats = []
|
||||||
subtitles = None
|
subtitles = None
|
||||||
|
urls = []
|
||||||
|
|
||||||
for media in self._extract_medias(media_selection):
|
for media in self._extract_medias(media_selection):
|
||||||
kind = media.get('kind')
|
kind = media.get('kind')
|
||||||
if kind == 'audio':
|
if kind in ('video', 'audio'):
|
||||||
formats.extend(self._extract_audio(media, programme_id))
|
bitrate = int_or_none(media.get('bitrate'))
|
||||||
elif kind == 'video':
|
encoding = media.get('encoding')
|
||||||
formats.extend(self._extract_video(media, programme_id))
|
service = media.get('service')
|
||||||
|
width = int_or_none(media.get('width'))
|
||||||
|
height = int_or_none(media.get('height'))
|
||||||
|
file_size = int_or_none(media.get('media_file_size'))
|
||||||
|
for connection in self._extract_connections(media):
|
||||||
|
href = connection.get('href')
|
||||||
|
if href in urls:
|
||||||
|
continue
|
||||||
|
if href:
|
||||||
|
urls.append(href)
|
||||||
|
conn_kind = connection.get('kind')
|
||||||
|
protocol = connection.get('protocol')
|
||||||
|
supplier = connection.get('supplier')
|
||||||
|
transfer_format = connection.get('transferFormat')
|
||||||
|
format_id = supplier or conn_kind or protocol
|
||||||
|
if service:
|
||||||
|
format_id = '%s_%s' % (service, format_id)
|
||||||
|
# ASX playlist
|
||||||
|
if supplier == 'asx':
|
||||||
|
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||||
|
formats.append({
|
||||||
|
'url': ref,
|
||||||
|
'format_id': 'ref%s_%s' % (i, format_id),
|
||||||
|
})
|
||||||
|
elif transfer_format == 'dash':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
href, programme_id, mpd_id=format_id, fatal=False))
|
||||||
|
elif transfer_format == 'hls':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False))
|
||||||
|
elif transfer_format == 'hds':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
href, programme_id, f4m_id=format_id, fatal=False))
|
||||||
|
else:
|
||||||
|
if not service and not supplier and bitrate:
|
||||||
|
format_id += '-%d' % bitrate
|
||||||
|
fmt = {
|
||||||
|
'format_id': format_id,
|
||||||
|
'filesize': file_size,
|
||||||
|
}
|
||||||
|
if kind == 'video':
|
||||||
|
fmt.update({
|
||||||
|
'width': width,
|
||||||
|
'height': height,
|
||||||
|
'vbr': bitrate,
|
||||||
|
'vcodec': encoding,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
fmt.update({
|
||||||
|
'abr': bitrate,
|
||||||
|
'acodec': encoding,
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
if protocol == 'http':
|
||||||
|
# Direct link
|
||||||
|
fmt.update({
|
||||||
|
'url': href,
|
||||||
|
})
|
||||||
|
elif protocol == 'rtmp':
|
||||||
|
application = connection.get('application', 'ondemand')
|
||||||
|
auth_string = connection.get('authString')
|
||||||
|
identifier = connection.get('identifier')
|
||||||
|
server = connection.get('server')
|
||||||
|
fmt.update({
|
||||||
|
'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
|
||||||
|
'play_path': identifier,
|
||||||
|
'app': '%s?%s' % (application, auth_string),
|
||||||
|
'page_url': 'http://www.bbc.co.uk',
|
||||||
|
'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
|
||||||
|
'rtmp_live': False,
|
||||||
|
'ext': 'flv',
|
||||||
|
})
|
||||||
|
formats.append(fmt)
|
||||||
elif kind == 'captions':
|
elif kind == 'captions':
|
||||||
subtitles = self.extract_subtitles(media, programme_id)
|
subtitles = self.extract_subtitles(media, programme_id)
|
||||||
return formats, subtitles
|
return formats, subtitles
|
||||||
@ -589,7 +580,7 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '150615_telabyad_kentin_cogu',
|
'id': '150615_telabyad_kentin_cogu',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Tel Abyad'da IŞİD bayrağı indirildi YPG bayrağı çekildi",
|
'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
|
||||||
'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
|
'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
|
||||||
'timestamp': 1434397334,
|
'timestamp': 1434397334,
|
||||||
'upload_date': '20150615',
|
'upload_date': '20150615',
|
||||||
@ -654,6 +645,23 @@ class BBCIE(BBCCoUkIE):
|
|||||||
# rtmp download
|
# rtmp download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# single video embedded with Morph
|
||||||
|
'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p041vhd0',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Nigeria v Japan - Men's First Round",
|
||||||
|
'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
|
||||||
|
'duration': 7980,
|
||||||
|
'uploader': 'BBC Sport',
|
||||||
|
'uploader_id': 'bbc_sport',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'skip': 'Georestricted to UK',
|
||||||
}, {
|
}, {
|
||||||
# single video with playlist.sxml URL in playlist param
|
# single video with playlist.sxml URL in playlist param
|
||||||
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
'url': 'http://www.bbc.com/sport/0/football/33653409',
|
||||||
@ -751,7 +759,7 @@ class BBCIE(BBCCoUkIE):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
json_ld_info = self._search_json_ld(webpage, playlist_id, default=None)
|
json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
|
||||||
timestamp = json_ld_info.get('timestamp')
|
timestamp = json_ld_info.get('timestamp')
|
||||||
|
|
||||||
playlist_title = json_ld_info.get('title')
|
playlist_title = json_ld_info.get('title')
|
||||||
@ -820,13 +828,19 @@ class BBCIE(BBCCoUkIE):
|
|||||||
# http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
|
# http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
|
||||||
playlist = data_playable.get('otherSettings', {}).get('playlist', {})
|
playlist = data_playable.get('otherSettings', {}).get('playlist', {})
|
||||||
if playlist:
|
if playlist:
|
||||||
for key in ('progressiveDownload', 'streaming'):
|
entry = None
|
||||||
|
for key in ('streaming', 'progressiveDownload'):
|
||||||
playlist_url = playlist.get('%sUrl' % key)
|
playlist_url = playlist.get('%sUrl' % key)
|
||||||
if not playlist_url:
|
if not playlist_url:
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
entries.append(self._extract_from_playlist_sxml(
|
info = self._extract_from_playlist_sxml(
|
||||||
playlist_url, playlist_id, timestamp))
|
playlist_url, playlist_id, timestamp)
|
||||||
|
if not entry:
|
||||||
|
entry = info
|
||||||
|
else:
|
||||||
|
entry['title'] = info['title']
|
||||||
|
entry['formats'].extend(info['formats'])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Some playlist URL may fail with 500, at the same time
|
# Some playlist URL may fail with 500, at the same time
|
||||||
# the other one may work fine (e.g.
|
# the other one may work fine (e.g.
|
||||||
@ -834,6 +848,9 @@ class BBCIE(BBCCoUkIE):
|
|||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
|
||||||
continue
|
continue
|
||||||
raise
|
raise
|
||||||
|
if entry:
|
||||||
|
self._sort_formats(entry['formats'])
|
||||||
|
entries.append(entry)
|
||||||
|
|
||||||
if entries:
|
if entries:
|
||||||
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
|
||||||
@ -866,6 +883,50 @@ class BBCIE(BBCCoUkIE):
|
|||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
||||||
|
# There are several setPayload calls may be present but the video
|
||||||
|
# seems to be always related to the first one
|
||||||
|
morph_payload = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
|
||||||
|
webpage, 'morph payload', default='{}'),
|
||||||
|
playlist_id, fatal=False)
|
||||||
|
if morph_payload:
|
||||||
|
components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
|
||||||
|
for component in components:
|
||||||
|
if not isinstance(component, dict):
|
||||||
|
continue
|
||||||
|
lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
|
||||||
|
if not lead_media:
|
||||||
|
continue
|
||||||
|
identifiers = lead_media.get('identifiers')
|
||||||
|
if not identifiers or not isinstance(identifiers, dict):
|
||||||
|
continue
|
||||||
|
programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
|
||||||
|
if not programme_id:
|
||||||
|
continue
|
||||||
|
title = lead_media.get('title') or self._og_search_title(webpage)
|
||||||
|
formats, subtitles = self._download_media_selector(programme_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
description = lead_media.get('summary')
|
||||||
|
uploader = lead_media.get('masterBrand')
|
||||||
|
uploader_id = lead_media.get('mid')
|
||||||
|
duration = None
|
||||||
|
duration_d = lead_media.get('duration')
|
||||||
|
if isinstance(duration_d, dict):
|
||||||
|
duration = parse_duration(dict_get(
|
||||||
|
duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
|
||||||
|
return {
|
||||||
|
'id': programme_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
def extract_all(pattern):
|
def extract_all(pattern):
|
||||||
return list(filter(None, map(
|
return list(filter(None, map(
|
||||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||||
@ -883,7 +944,7 @@ class BBCIE(BBCCoUkIE):
|
|||||||
r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
|
r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
|
||||||
if entries:
|
if entries:
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
[self.url_result(entry, 'BBCCoUk') for entry in entries],
|
[self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
|
||||||
playlist_id, playlist_title, playlist_description)
|
playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
# Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
|
# Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
|
||||||
|
@ -25,13 +25,13 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.bilibili.tv/video/av1074402/',
|
'url': 'http://www.bilibili.tv/video/av1074402/',
|
||||||
'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
|
'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1554319',
|
'id': '1554319',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': '【金坷垃】金泡沫',
|
'title': '【金坷垃】金泡沫',
|
||||||
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
|
||||||
'duration': 308.067,
|
'duration': 308.315,
|
||||||
'timestamp': 1398012660,
|
'timestamp': 1398012660,
|
||||||
'upload_date': '20140420',
|
'upload_date': '20140420',
|
||||||
'thumbnail': 're:^https?://.+\.jpg',
|
'thumbnail': 're:^https?://.+\.jpg',
|
||||||
@ -41,73 +41,33 @@ class BiliBiliIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bilibili.com/video/av1041170/',
|
'url': 'http://www.bilibili.com/video/av1041170/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1041170',
|
'id': '1507019',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': '【BD1080P】刀语【诸神&异域】',
|
'title': '【BD1080P】刀语【诸神&异域】',
|
||||||
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
|
'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~',
|
||||||
|
'timestamp': 1396530060,
|
||||||
|
'upload_date': '20140403',
|
||||||
|
'uploader': '枫叶逝去',
|
||||||
|
'uploader_id': '520116',
|
||||||
},
|
},
|
||||||
'playlist_count': 9,
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bilibili.com/video/av4808130/',
|
'url': 'http://www.bilibili.com/video/av4808130/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4808130',
|
'id': '7802182',
|
||||||
|
'ext': 'mp4',
|
||||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
'title': '【长篇】哆啦A梦443【钉铛】',
|
||||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
||||||
|
'timestamp': 1464564180,
|
||||||
|
'upload_date': '20160529',
|
||||||
|
'uploader': '喜欢拉面',
|
||||||
|
'uploader_id': '151066',
|
||||||
},
|
},
|
||||||
'playlist': [{
|
|
||||||
'md5': '55cdadedf3254caaa0d5d27cf20a8f9c',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '4808130_part1',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
|
||||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
|
||||||
'timestamp': 1464564180,
|
|
||||||
'upload_date': '20160529',
|
|
||||||
'uploader': '喜欢拉面',
|
|
||||||
'uploader_id': '151066',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'md5': '926f9f67d0c482091872fbd8eca7ea3d',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '4808130_part2',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
|
||||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
|
||||||
'timestamp': 1464564180,
|
|
||||||
'upload_date': '20160529',
|
|
||||||
'uploader': '喜欢拉面',
|
|
||||||
'uploader_id': '151066',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'md5': '4b7b225b968402d7c32348c646f1fd83',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '4808130_part3',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
|
||||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
|
||||||
'timestamp': 1464564180,
|
|
||||||
'upload_date': '20160529',
|
|
||||||
'uploader': '喜欢拉面',
|
|
||||||
'uploader_id': '151066',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'md5': '7b795e214166501e9141139eea236e91',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '4808130_part4',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': '【长篇】哆啦A梦443【钉铛】',
|
|
||||||
'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929',
|
|
||||||
'timestamp': 1464564180,
|
|
||||||
'upload_date': '20160529',
|
|
||||||
'uploader': '喜欢拉面',
|
|
||||||
'uploader_id': '151066',
|
|
||||||
},
|
|
||||||
}],
|
|
||||||
}, {
|
}, {
|
||||||
# Missing upload time
|
# Missing upload time
|
||||||
'url': 'http://www.bilibili.com/video/av1867637/',
|
'url': 'http://www.bilibili.com/video/av1867637/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2880301',
|
'id': '2880301',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】',
|
'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】',
|
||||||
'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】',
|
'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】',
|
||||||
'uploader': '黑夜为猫',
|
'uploader': '黑夜为猫',
|
||||||
|
@ -24,7 +24,8 @@ class BIQLEIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Ребенок в шоке от автоматической мойки',
|
'title': 'Ребенок в шоке от автоматической мойки',
|
||||||
'uploader': 'Dmitry Kotov',
|
'uploader': 'Dmitry Kotov',
|
||||||
}
|
},
|
||||||
|
'skip': ' This video was marked as adult. Embedding adult videos on external sites is prohibited.',
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -1,12 +1,10 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import calendar
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
from .anvato import AnvatoIE
|
from .anvato import AnvatoIE
|
||||||
from .sendtonews import SendtoNewsIE
|
from .sendtonews import SendtoNewsIE
|
||||||
from ..compat import compat_urlparse
|
from ..compat import compat_urlparse
|
||||||
|
from ..utils import unified_timestamp
|
||||||
|
|
||||||
|
|
||||||
class CBSLocalIE(AnvatoIE):
|
class CBSLocalIE(AnvatoIE):
|
||||||
@ -71,10 +69,7 @@ class CBSLocalIE(AnvatoIE):
|
|||||||
|
|
||||||
time_str = self._html_search_regex(
|
time_str = self._html_search_regex(
|
||||||
r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
|
r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
|
||||||
timestamp = None
|
timestamp = unified_timestamp(time_str)
|
||||||
if time_str:
|
|
||||||
timestamp = calendar.timegm(datetime.datetime.strptime(
|
|
||||||
time_str, '%b %d, %Y %I:%M %p').timetuple())
|
|
||||||
|
|
||||||
info_dict.update({
|
info_dict.update({
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
|
@ -17,7 +17,8 @@ class ChaturbateIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
},
|
||||||
|
'skip': 'Room is offline',
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://en.chaturbate.com/siswet19/',
|
'url': 'https://en.chaturbate.com/siswet19/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -816,11 +816,14 @@ class InfoExtractor(object):
|
|||||||
json_ld = self._search_regex(
|
json_ld = self._search_regex(
|
||||||
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>',
|
||||||
html, 'JSON-LD', group='json_ld', **kwargs)
|
html, 'JSON-LD', group='json_ld', **kwargs)
|
||||||
|
default = kwargs.get('default', NO_DEFAULT)
|
||||||
if not json_ld:
|
if not json_ld:
|
||||||
return {}
|
return default if default is not NO_DEFAULT else {}
|
||||||
return self._json_ld(
|
# JSON-LD may be malformed and thus `fatal` should be respected.
|
||||||
json_ld, video_id, fatal=kwargs.get('fatal', True),
|
# At the same time `default` may be passed that assumes `fatal=False`
|
||||||
expected_type=expected_type)
|
# for _search_regex. Let's simulate the same behavior here as well.
|
||||||
|
fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
|
||||||
|
return self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
|
||||||
|
|
||||||
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
|
||||||
if isinstance(json_ld, compat_str):
|
if isinstance(json_ld, compat_str):
|
||||||
@ -828,41 +831,47 @@ class InfoExtractor(object):
|
|||||||
if not json_ld:
|
if not json_ld:
|
||||||
return {}
|
return {}
|
||||||
info = {}
|
info = {}
|
||||||
if json_ld.get('@context') == 'http://schema.org':
|
if not isinstance(json_ld, (list, tuple, dict)):
|
||||||
item_type = json_ld.get('@type')
|
return info
|
||||||
if expected_type is not None and expected_type != item_type:
|
if isinstance(json_ld, dict):
|
||||||
return info
|
json_ld = [json_ld]
|
||||||
if item_type == 'TVEpisode':
|
for e in json_ld:
|
||||||
info.update({
|
if e.get('@context') == 'http://schema.org':
|
||||||
'episode': unescapeHTML(json_ld.get('name')),
|
item_type = e.get('@type')
|
||||||
'episode_number': int_or_none(json_ld.get('episodeNumber')),
|
if expected_type is not None and expected_type != item_type:
|
||||||
'description': unescapeHTML(json_ld.get('description')),
|
return info
|
||||||
})
|
if item_type == 'TVEpisode':
|
||||||
part_of_season = json_ld.get('partOfSeason')
|
info.update({
|
||||||
if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
|
'episode': unescapeHTML(e.get('name')),
|
||||||
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
'episode_number': int_or_none(e.get('episodeNumber')),
|
||||||
part_of_series = json_ld.get('partOfSeries')
|
'description': unescapeHTML(e.get('description')),
|
||||||
if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
|
})
|
||||||
info['series'] = unescapeHTML(part_of_series.get('name'))
|
part_of_season = e.get('partOfSeason')
|
||||||
elif item_type == 'Article':
|
if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
|
||||||
info.update({
|
info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
|
||||||
'timestamp': parse_iso8601(json_ld.get('datePublished')),
|
part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
|
||||||
'title': unescapeHTML(json_ld.get('headline')),
|
if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
|
||||||
'description': unescapeHTML(json_ld.get('articleBody')),
|
info['series'] = unescapeHTML(part_of_series.get('name'))
|
||||||
})
|
elif item_type == 'Article':
|
||||||
elif item_type == 'VideoObject':
|
info.update({
|
||||||
info.update({
|
'timestamp': parse_iso8601(e.get('datePublished')),
|
||||||
'url': json_ld.get('contentUrl'),
|
'title': unescapeHTML(e.get('headline')),
|
||||||
'title': unescapeHTML(json_ld.get('name')),
|
'description': unescapeHTML(e.get('articleBody')),
|
||||||
'description': unescapeHTML(json_ld.get('description')),
|
})
|
||||||
'thumbnail': json_ld.get('thumbnailUrl'),
|
elif item_type == 'VideoObject':
|
||||||
'duration': parse_duration(json_ld.get('duration')),
|
info.update({
|
||||||
'timestamp': unified_timestamp(json_ld.get('uploadDate')),
|
'url': e.get('contentUrl'),
|
||||||
'filesize': float_or_none(json_ld.get('contentSize')),
|
'title': unescapeHTML(e.get('name')),
|
||||||
'tbr': int_or_none(json_ld.get('bitrate')),
|
'description': unescapeHTML(e.get('description')),
|
||||||
'width': int_or_none(json_ld.get('width')),
|
'thumbnail': e.get('thumbnailUrl'),
|
||||||
'height': int_or_none(json_ld.get('height')),
|
'duration': parse_duration(e.get('duration')),
|
||||||
})
|
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||||
|
'filesize': float_or_none(e.get('contentSize')),
|
||||||
|
'tbr': int_or_none(e.get('bitrate')),
|
||||||
|
'width': int_or_none(e.get('width')),
|
||||||
|
'height': int_or_none(e.get('height')),
|
||||||
|
})
|
||||||
|
break
|
||||||
return dict((k, v) for k, v in info.items() if v is not None)
|
return dict((k, v) for k, v in info.items() if v is not None)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -916,7 +925,8 @@ class InfoExtractor(object):
|
|||||||
if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
|
if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
|
||||||
preference -= 0.5
|
preference -= 0.5
|
||||||
|
|
||||||
proto_preference = 0 if determine_protocol(f) in ['http', 'https'] else -0.1
|
protocol = f.get('protocol') or determine_protocol(f)
|
||||||
|
proto_preference = 0 if protocol in ['http', 'https'] else (-0.5 if protocol == 'rtsp' else -0.1)
|
||||||
|
|
||||||
if f.get('vcodec') == 'none': # audio only
|
if f.get('vcodec') == 'none': # audio only
|
||||||
preference -= 50
|
preference -= 50
|
||||||
@ -1133,7 +1143,7 @@ class InfoExtractor(object):
|
|||||||
'url': m3u8_url,
|
'url': m3u8_url,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'protocol': 'm3u8',
|
'protocol': 'm3u8',
|
||||||
'preference': preference - 1 if preference else -1,
|
'preference': preference - 100 if preference else -100,
|
||||||
'resolution': 'multiple',
|
'resolution': 'multiple',
|
||||||
'format_note': 'Quality selection URL',
|
'format_note': 'Quality selection URL',
|
||||||
}
|
}
|
||||||
|
@ -5,13 +5,17 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
orderedSet,
|
orderedSet,
|
||||||
remove_end,
|
remove_end,
|
||||||
|
extract_attributes,
|
||||||
|
mimetype2ext,
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -58,6 +62,9 @@ class CondeNastIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '3D Printed Speakers Lit With LED',
|
'title': '3D Printed Speakers Lit With LED',
|
||||||
'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
|
'description': 'Check out these beautiful 3D printed LED speakers. You can\'t actually buy them, but LumiGeek is working on a board that will let you make you\'re own.',
|
||||||
|
'uploader': 'wired',
|
||||||
|
'upload_date': '20130314',
|
||||||
|
'timestamp': 1363219200,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# JS embed
|
# JS embed
|
||||||
@ -67,70 +74,93 @@ class CondeNastIE(InfoExtractor):
|
|||||||
'id': '55f9cf8b61646d1acf00000c',
|
'id': '55f9cf8b61646d1acf00000c',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
|
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
|
||||||
|
'uploader': 'arstechnica',
|
||||||
|
'upload_date': '20150916',
|
||||||
|
'timestamp': 1442434955,
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_series(self, url, webpage):
|
def _extract_series(self, url, webpage):
|
||||||
title = self._html_search_regex(r'<div class="cne-series-info">.*?<h1>(.+?)</h1>',
|
title = self._html_search_regex(
|
||||||
webpage, 'series title', flags=re.DOTALL)
|
r'(?s)<div class="cne-series-info">.*?<h1>(.+?)</h1>',
|
||||||
|
webpage, 'series title')
|
||||||
url_object = compat_urllib_parse_urlparse(url)
|
url_object = compat_urllib_parse_urlparse(url)
|
||||||
base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
|
base_url = '%s://%s' % (url_object.scheme, url_object.netloc)
|
||||||
m_paths = re.finditer(r'<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]',
|
m_paths = re.finditer(
|
||||||
webpage, flags=re.DOTALL)
|
r'(?s)<p class="cne-thumb-title">.*?<a href="(/watch/.+?)["\?]', webpage)
|
||||||
paths = orderedSet(m.group(1) for m in m_paths)
|
paths = orderedSet(m.group(1) for m in m_paths)
|
||||||
build_url = lambda path: compat_urlparse.urljoin(base_url, path)
|
build_url = lambda path: compat_urlparse.urljoin(base_url, path)
|
||||||
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
entries = [self.url_result(build_url(path), 'CondeNast') for path in paths]
|
||||||
return self.playlist_result(entries, playlist_title=title)
|
return self.playlist_result(entries, playlist_title=title)
|
||||||
|
|
||||||
def _extract_video(self, webpage, url_type):
|
def _extract_video(self, webpage, url_type):
|
||||||
if url_type != 'embed':
|
query = {}
|
||||||
description = self._html_search_regex(
|
params = self._search_regex(
|
||||||
[
|
r'(?s)var params = {(.+?)}[;,]', webpage, 'player params', default=None)
|
||||||
r'<div class="cne-video-description">(.+?)</div>',
|
if params:
|
||||||
r'<div class="video-post-content">(.+?)</div>',
|
query.update({
|
||||||
],
|
'videoId': self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id'),
|
||||||
webpage, 'description', fatal=False, flags=re.DOTALL)
|
'playerId': self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id'),
|
||||||
|
'target': self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target'),
|
||||||
|
})
|
||||||
else:
|
else:
|
||||||
description = None
|
params = extract_attributes(self._search_regex(
|
||||||
params = self._search_regex(r'var params = {(.+?)}[;,]', webpage,
|
r'(<[^>]+data-js="video-player"[^>]+>)',
|
||||||
'player params', flags=re.DOTALL)
|
webpage, 'player params element'))
|
||||||
video_id = self._search_regex(r'videoId: [\'"](.+?)[\'"]', params, 'video id')
|
query.update({
|
||||||
player_id = self._search_regex(r'playerId: [\'"](.+?)[\'"]', params, 'player id')
|
'videoId': params['data-video'],
|
||||||
target = self._search_regex(r'target: [\'"](.+?)[\'"]', params, 'target')
|
'playerId': params['data-player'],
|
||||||
data = compat_urllib_parse_urlencode({'videoId': video_id,
|
'target': params['id'],
|
||||||
'playerId': player_id,
|
})
|
||||||
'target': target,
|
video_id = query['videoId']
|
||||||
})
|
video_info = None
|
||||||
base_info_url = self._search_regex(r'url = [\'"](.+?)[\'"][,;]',
|
info_page = self._download_webpage(
|
||||||
webpage, 'base info url',
|
'http://player.cnevids.com/player/video.js',
|
||||||
default='http://player.cnevids.com/player/loader.js?')
|
video_id, 'Downloading video info', query=query, fatal=False)
|
||||||
info_url = base_info_url + data
|
if info_page:
|
||||||
info_page = self._download_webpage(info_url, video_id,
|
video_info = self._parse_json(self._search_regex(
|
||||||
'Downloading video info')
|
r'loadCallback\(({.+})\)', info_page, 'video info'), video_id)['video']
|
||||||
video_info = self._search_regex(r'var\s+video\s*=\s*({.+?});', info_page, 'video info')
|
else:
|
||||||
video_info = self._parse_json(video_info, video_id)
|
info_page = self._download_webpage(
|
||||||
|
'http://player.cnevids.com/player/loader.js',
|
||||||
|
video_id, 'Downloading loader info', query=query)
|
||||||
|
video_info = self._parse_json(self._search_regex(
|
||||||
|
r'var\s+video\s*=\s*({.+?});', info_page, 'video info'), video_id)
|
||||||
|
title = video_info['title']
|
||||||
|
|
||||||
formats = [{
|
formats = []
|
||||||
'format_id': '%s-%s' % (fdata['type'].split('/')[-1], fdata['quality']),
|
for fdata in video_info.get('sources', [{}])[0]:
|
||||||
'url': fdata['src'],
|
src = fdata.get('src')
|
||||||
'ext': fdata['type'].split('/')[-1],
|
if not src:
|
||||||
'quality': 1 if fdata['quality'] == 'high' else 0,
|
continue
|
||||||
} for fdata in video_info['sources'][0]]
|
ext = mimetype2ext(fdata.get('type')) or determine_ext(src)
|
||||||
|
quality = fdata.get('quality')
|
||||||
|
formats.append({
|
||||||
|
'format_id': ext + ('-%s' % quality if quality else ''),
|
||||||
|
'url': src,
|
||||||
|
'ext': ext,
|
||||||
|
'quality': 1 if quality == 'high' else 0,
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
info = self._search_json_ld(
|
||||||
|
webpage, video_id, fatal=False) if url_type != 'embed' else {}
|
||||||
|
info.update({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'title': video_info['title'],
|
'title': title,
|
||||||
'thumbnail': video_info['poster_frame'],
|
'thumbnail': video_info.get('poster_frame'),
|
||||||
'description': description,
|
'uploader': video_info.get('brand'),
|
||||||
}
|
'duration': int_or_none(video_info.get('duration')),
|
||||||
|
'tags': video_info.get('tags'),
|
||||||
|
'series': video_info.get('series_title'),
|
||||||
|
'season': video_info.get('season_title'),
|
||||||
|
'timestamp': parse_iso8601(video_info.get('premiere_date')),
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
site, url_type, item_id = re.match(self._VALID_URL, url).groups()
|
||||||
site = mobj.group('site')
|
|
||||||
url_type = mobj.group('type')
|
|
||||||
item_id = mobj.group('id')
|
|
||||||
|
|
||||||
# Convert JS embed to regular embed
|
# Convert JS embed to regular embed
|
||||||
if url_type == 'embedjs':
|
if url_type == 'embedjs':
|
||||||
|
@ -28,7 +28,8 @@ class CWTVIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
},
|
||||||
|
'skip': 'redirect to http://cwtv.com/shows/arrow/',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cwseed.com/shows/whose-line-is-it-anyway/jeff-davis-4/?play=24282b12-ead2-42f2-95ad-26770c2c6088',
|
'url': 'http://www.cwseed.com/shows/whose-line-is-it-anyway/jeff-davis-4/?play=24282b12-ead2-42f2-95ad-26770c2c6088',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -44,10 +45,6 @@ class CWTVIE(InfoExtractor):
|
|||||||
'upload_date': '20151006',
|
'upload_date': '20151006',
|
||||||
'timestamp': 1444107300,
|
'timestamp': 1444107300,
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
|
'url': 'http://cwtv.com/thecw/chroniclesofcisco/?play=8adebe35-f447-465f-ab52-e863506ff6d6',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -61,11 +58,30 @@ class CWTVIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
video_data = self._download_json(
|
video_data = None
|
||||||
'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/132?format=json' % video_id, video_id)
|
formats = []
|
||||||
|
for partner in (154, 213):
|
||||||
formats = self._extract_m3u8_formats(
|
vdata = self._download_json(
|
||||||
video_data['videos']['variantplaylist']['uri'], video_id, 'mp4')
|
'http://metaframe.digitalsmiths.tv/v2/CWtv/assets/%s/partner/%d?format=json' % (video_id, partner), video_id, fatal=False)
|
||||||
|
if not vdata:
|
||||||
|
continue
|
||||||
|
video_data = vdata
|
||||||
|
for quality, quality_data in vdata.get('videos', {}).items():
|
||||||
|
quality_url = quality_data.get('uri')
|
||||||
|
if not quality_url:
|
||||||
|
continue
|
||||||
|
if quality == 'variantplaylist':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
quality_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||||
|
else:
|
||||||
|
tbr = int_or_none(quality_data.get('bitrate'))
|
||||||
|
format_id = 'http' + ('-%d' % tbr if tbr else '')
|
||||||
|
if self._is_valid_url(quality_url, video_id, format_id):
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': quality_url,
|
||||||
|
'tbr': tbr,
|
||||||
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
thumbnails = [{
|
thumbnails = [{
|
||||||
|
98
youtube_dl/extractor/discoverygo.py
Normal file
98
youtube_dl/extractor/discoverygo.py
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
int_or_none,
|
||||||
|
parse_age_limit,
|
||||||
|
unescapeHTML,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DiscoveryGoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?discoverygo\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '57a33c536b66d1cd0345eeb1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Kiss First, Ask Questions Later!',
|
||||||
|
'description': 'md5:fe923ba34050eae468bffae10831cb22',
|
||||||
|
'duration': 2579,
|
||||||
|
'series': 'Love at First Kiss',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode_number': 1,
|
||||||
|
'age_limit': 14,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
container = extract_attributes(
|
||||||
|
self._search_regex(
|
||||||
|
r'(<div[^>]+class=["\']video-player-container[^>]+>)',
|
||||||
|
webpage, 'video container'))
|
||||||
|
|
||||||
|
video = self._parse_json(
|
||||||
|
unescapeHTML(container.get('data-video') or container.get('data-json')),
|
||||||
|
display_id)
|
||||||
|
|
||||||
|
title = video['name']
|
||||||
|
|
||||||
|
stream = video['stream']
|
||||||
|
STREAM_URL_SUFFIX = 'streamUrl'
|
||||||
|
formats = []
|
||||||
|
for stream_kind in ('', 'hds'):
|
||||||
|
suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX
|
||||||
|
stream_url = stream.get('%s%s' % (stream_kind, suffix))
|
||||||
|
if not stream_url:
|
||||||
|
continue
|
||||||
|
if stream_kind == '':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
stream_url, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif stream_kind == 'hds':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
stream_url, display_id, f4m_id=stream_kind, fatal=False))
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
video_id = video.get('id') or display_id
|
||||||
|
description = video.get('description', {}).get('detailed')
|
||||||
|
duration = int_or_none(video.get('duration'))
|
||||||
|
|
||||||
|
series = video.get('show', {}).get('name')
|
||||||
|
season_number = int_or_none(video.get('season', {}).get('number'))
|
||||||
|
episode_number = int_or_none(video.get('episodeNumber'))
|
||||||
|
|
||||||
|
tags = video.get('tags')
|
||||||
|
age_limit = parse_age_limit(video.get('parental', {}).get('rating'))
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
captions = stream.get('captions')
|
||||||
|
if isinstance(captions, list):
|
||||||
|
for caption in captions:
|
||||||
|
subtitle_url = caption.get('fileUrl')
|
||||||
|
if (not subtitle_url or not isinstance(subtitle_url, compat_str) or
|
||||||
|
not subtitle_url.startswith('http')):
|
||||||
|
continue
|
||||||
|
lang = caption.get('fileLang', 'en')
|
||||||
|
subtitles.setdefault(lang, []).append({'url': subtitle_url})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'series': series,
|
||||||
|
'season_number': season_number,
|
||||||
|
'episode_number': episode_number,
|
||||||
|
'tags': tags,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
@ -4,9 +4,10 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class EngadgetIE(InfoExtractor):
|
class EngadgetIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www.engadget.com/video/(?P<id>\d+)'
|
_VALID_URL = r'https?://www.engadget.com/video/(?P<id>[^/?#]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
|
# video with 5min ID
|
||||||
'url': 'http://www.engadget.com/video/518153925/',
|
'url': 'http://www.engadget.com/video/518153925/',
|
||||||
'md5': 'c6820d4828a5064447a4d9fc73f312c9',
|
'md5': 'c6820d4828a5064447a4d9fc73f312c9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -15,8 +16,12 @@ class EngadgetIE(InfoExtractor):
|
|||||||
'title': 'Samsung Galaxy Tab Pro 8.4 Review',
|
'title': 'Samsung Galaxy Tab Pro 8.4 Review',
|
||||||
},
|
},
|
||||||
'add_ie': ['FiveMin'],
|
'add_ie': ['FiveMin'],
|
||||||
}
|
}, {
|
||||||
|
# video with vidible ID
|
||||||
|
'url': 'https://www.engadget.com/video/57a28462134aa15a39f0421a/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
return self.url_result('5min:%s' % video_id)
|
return self.url_result('aol-video:%s' % video_id)
|
||||||
|
@ -221,6 +221,7 @@ from .dvtv import DVTVIE
|
|||||||
from .dumpert import DumpertIE
|
from .dumpert import DumpertIE
|
||||||
from .defense import DefenseGouvFrIE
|
from .defense import DefenseGouvFrIE
|
||||||
from .discovery import DiscoveryIE
|
from .discovery import DiscoveryIE
|
||||||
|
from .discoverygo import DiscoveryGoIE
|
||||||
from .dispeak import DigitallySpeakingIE
|
from .dispeak import DigitallySpeakingIE
|
||||||
from .dropbox import DropboxIE
|
from .dropbox import DropboxIE
|
||||||
from .dw import (
|
from .dw import (
|
||||||
@ -290,7 +291,6 @@ from .funimation import FunimationIE
|
|||||||
from .funnyordie import FunnyOrDieIE
|
from .funnyordie import FunnyOrDieIE
|
||||||
from .fusion import FusionIE
|
from .fusion import FusionIE
|
||||||
from .gameinformer import GameInformerIE
|
from .gameinformer import GameInformerIE
|
||||||
from .gamekings import GamekingsIE
|
|
||||||
from .gameone import (
|
from .gameone import (
|
||||||
GameOneIE,
|
GameOneIE,
|
||||||
GameOnePlaylistIE,
|
GameOnePlaylistIE,
|
||||||
@ -492,8 +492,9 @@ from .myvi import MyviIE
|
|||||||
from .myvideo import MyVideoIE
|
from .myvideo import MyVideoIE
|
||||||
from .myvidster import MyVidsterIE
|
from .myvidster import MyVidsterIE
|
||||||
from .nationalgeographic import (
|
from .nationalgeographic import (
|
||||||
|
NationalGeographicVideoIE,
|
||||||
NationalGeographicIE,
|
NationalGeographicIE,
|
||||||
NationalGeographicChannelIE,
|
NationalGeographicEpisodeGuideIE,
|
||||||
)
|
)
|
||||||
from .naver import NaverIE
|
from .naver import NaverIE
|
||||||
from .nba import NBAIE
|
from .nba import NBAIE
|
||||||
@ -636,6 +637,7 @@ from .pluralsight import (
|
|||||||
PluralsightCourseIE,
|
PluralsightCourseIE,
|
||||||
)
|
)
|
||||||
from .podomatic import PodomaticIE
|
from .podomatic import PodomaticIE
|
||||||
|
from .pokemon import PokemonIE
|
||||||
from .polskieradio import PolskieRadioIE
|
from .polskieradio import PolskieRadioIE
|
||||||
from .porn91 import Porn91IE
|
from .porn91 import Porn91IE
|
||||||
from .pornhd import PornHdIE
|
from .pornhd import PornHdIE
|
||||||
@ -694,6 +696,7 @@ from .rockstargames import RockstarGamesIE
|
|||||||
from .roosterteeth import RoosterTeethIE
|
from .roosterteeth import RoosterTeethIE
|
||||||
from .rottentomatoes import RottenTomatoesIE
|
from .rottentomatoes import RottenTomatoesIE
|
||||||
from .roxwel import RoxwelIE
|
from .roxwel import RoxwelIE
|
||||||
|
from .rozhlas import RozhlasIE
|
||||||
from .rtbf import RTBFIE
|
from .rtbf import RTBFIE
|
||||||
from .rte import RteIE, RteRadioIE
|
from .rte import RteIE, RteRadioIE
|
||||||
from .rtlnl import RtlNlIE
|
from .rtlnl import RtlNlIE
|
||||||
@ -753,6 +756,7 @@ from .smotri import (
|
|||||||
)
|
)
|
||||||
from .snotr import SnotrIE
|
from .snotr import SnotrIE
|
||||||
from .sohu import SohuIE
|
from .sohu import SohuIE
|
||||||
|
from .sonyliv import SonyLIVIE
|
||||||
from .soundcloud import (
|
from .soundcloud import (
|
||||||
SoundcloudIE,
|
SoundcloudIE,
|
||||||
SoundcloudSetIE,
|
SoundcloudSetIE,
|
||||||
@ -892,6 +896,7 @@ from .tvc import (
|
|||||||
from .tvigle import TvigleIE
|
from .tvigle import TvigleIE
|
||||||
from .tvland import TVLandIE
|
from .tvland import TVLandIE
|
||||||
from .tvp import (
|
from .tvp import (
|
||||||
|
TVPEmbedIE,
|
||||||
TVPIE,
|
TVPIE,
|
||||||
TVPSeriesIE,
|
TVPSeriesIE,
|
||||||
)
|
)
|
||||||
@ -1005,6 +1010,7 @@ from .vk import (
|
|||||||
)
|
)
|
||||||
from .vlive import VLiveIE
|
from .vlive import VLiveIE
|
||||||
from .vodlocker import VodlockerIE
|
from .vodlocker import VodlockerIE
|
||||||
|
from .vodplatform import VODPlatformIE
|
||||||
from .voicerepublic import VoiceRepublicIE
|
from .voicerepublic import VoiceRepublicIE
|
||||||
from .voxmedia import VoxMediaIE
|
from .voxmedia import VoxMediaIE
|
||||||
from .vporn import VpornIE
|
from .vporn import VpornIE
|
||||||
|
@ -1,24 +1,11 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
|
||||||
compat_parse_qs,
|
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
parse_duration,
|
|
||||||
replace_extension,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FiveMinIE(InfoExtractor):
|
class FiveMinIE(InfoExtractor):
|
||||||
IE_NAME = '5min'
|
IE_NAME = '5min'
|
||||||
_VALID_URL = r'(?:5min:(?P<id>\d+)(?::(?P<sid>\d+))?|https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?P<query>.*))'
|
_VALID_URL = r'(?:5min:|https?://(?:[^/]*?5min\.com/|delivery\.vidible\.tv/aol)(?:(?:Scripts/PlayerSeed\.js|playerseed/?)?\?.*?playList=)?)(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
@ -29,8 +16,16 @@ class FiveMinIE(InfoExtractor):
|
|||||||
'id': '518013791',
|
'id': '518013791',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'iPad Mini with Retina Display Review',
|
'title': 'iPad Mini with Retina Display Review',
|
||||||
|
'description': 'iPad mini with Retina Display review',
|
||||||
'duration': 177,
|
'duration': 177,
|
||||||
|
'uploader': 'engadget',
|
||||||
|
'upload_date': '20131115',
|
||||||
|
'timestamp': 1384515288,
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247
|
# From http://on.aol.com/video/how-to-make-a-next-level-fruit-salad-518086247
|
||||||
@ -44,108 +39,16 @@ class FiveMinIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'skip': 'no longer available',
|
'skip': 'no longer available',
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://embed.5min.com/518726732/',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://delivery.vidible.tv/aol?playList=518013791',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
]
|
]
|
||||||
_ERRORS = {
|
|
||||||
'ErrorVideoNotExist': 'We\'re sorry, but the video you are trying to watch does not exist.',
|
|
||||||
'ErrorVideoNoLongerAvailable': 'We\'re sorry, but the video you are trying to watch is no longer available.',
|
|
||||||
'ErrorVideoRejected': 'We\'re sorry, but the video you are trying to watch has been removed.',
|
|
||||||
'ErrorVideoUserNotGeo': 'We\'re sorry, but the video you are trying to watch cannot be viewed from your current location.',
|
|
||||||
'ErrorVideoLibraryRestriction': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.',
|
|
||||||
'ErrorExposurePermission': 'We\'re sorry, but the video you are trying to watch is currently unavailable for viewing at this domain.',
|
|
||||||
}
|
|
||||||
_QUALITIES = {
|
|
||||||
1: {
|
|
||||||
'width': 640,
|
|
||||||
'height': 360,
|
|
||||||
},
|
|
||||||
2: {
|
|
||||||
'width': 854,
|
|
||||||
'height': 480,
|
|
||||||
},
|
|
||||||
4: {
|
|
||||||
'width': 1280,
|
|
||||||
'height': 720,
|
|
||||||
},
|
|
||||||
8: {
|
|
||||||
'width': 1920,
|
|
||||||
'height': 1080,
|
|
||||||
},
|
|
||||||
16: {
|
|
||||||
'width': 640,
|
|
||||||
'height': 360,
|
|
||||||
},
|
|
||||||
32: {
|
|
||||||
'width': 854,
|
|
||||||
'height': 480,
|
|
||||||
},
|
|
||||||
64: {
|
|
||||||
'width': 1280,
|
|
||||||
'height': 720,
|
|
||||||
},
|
|
||||||
128: {
|
|
||||||
'width': 640,
|
|
||||||
'height': 360,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
return self.url_result('aol-video:%s' % video_id)
|
||||||
sid = mobj.group('sid')
|
|
||||||
|
|
||||||
if mobj.group('query'):
|
|
||||||
qs = compat_parse_qs(mobj.group('query'))
|
|
||||||
if not qs.get('playList'):
|
|
||||||
raise ExtractorError('Invalid URL', expected=True)
|
|
||||||
video_id = qs['playList'][0]
|
|
||||||
if qs.get('sid'):
|
|
||||||
sid = qs['sid'][0]
|
|
||||||
|
|
||||||
embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
|
|
||||||
if not sid:
|
|
||||||
embed_page = self._download_webpage(embed_url, video_id,
|
|
||||||
'Downloading embed page')
|
|
||||||
sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
|
|
||||||
|
|
||||||
response = self._download_json(
|
|
||||||
'https://syn.5min.com/handlers/SenseHandler.ashx?' +
|
|
||||||
compat_urllib_parse_urlencode({
|
|
||||||
'func': 'GetResults',
|
|
||||||
'playlist': video_id,
|
|
||||||
'sid': sid,
|
|
||||||
'isPlayerSeed': 'true',
|
|
||||||
'url': embed_url,
|
|
||||||
}),
|
|
||||||
video_id)
|
|
||||||
if not response['success']:
|
|
||||||
raise ExtractorError(
|
|
||||||
'%s said: %s' % (
|
|
||||||
self.IE_NAME,
|
|
||||||
self._ERRORS.get(response['errorMessage'], response['errorMessage'])),
|
|
||||||
expected=True)
|
|
||||||
info = response['binding'][0]
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs(
|
|
||||||
compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0])
|
|
||||||
for rendition in info['Renditions']:
|
|
||||||
if rendition['RenditionType'] == 'aac' or rendition['RenditionType'] == 'm3u8':
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType'])))
|
|
||||||
quality = self._QUALITIES.get(rendition['ID'], {})
|
|
||||||
formats.append({
|
|
||||||
'format_id': '%s-%d' % (rendition['RenditionType'], rendition['ID']),
|
|
||||||
'url': rendition_url,
|
|
||||||
'width': quality.get('width'),
|
|
||||||
'height': quality.get('height'),
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': info['Title'],
|
|
||||||
'thumbnail': info.get('ThumbURL'),
|
|
||||||
'duration': parse_duration(info.get('Duration')),
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
@ -48,7 +48,7 @@ class FlipagramIE(InfoExtractor):
|
|||||||
flipagram = video_data['flipagram']
|
flipagram = video_data['flipagram']
|
||||||
video = flipagram['video']
|
video = flipagram['video']
|
||||||
|
|
||||||
json_ld = self._search_json_ld(webpage, video_id, default=False)
|
json_ld = self._search_json_ld(webpage, video_id, default={})
|
||||||
title = json_ld.get('title') or flipagram['captionText']
|
title = json_ld.get('title') or flipagram['captionText']
|
||||||
description = json_ld.get('description') or flipagram.get('captionText')
|
description = json_ld.get('description') or flipagram.get('captionText')
|
||||||
|
|
||||||
|
@ -2,7 +2,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import smuggle_url
|
from ..utils import (
|
||||||
|
smuggle_url,
|
||||||
|
update_url_query,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class FOXIE(InfoExtractor):
|
class FOXIE(InfoExtractor):
|
||||||
@ -29,11 +32,12 @@ class FOXIE(InfoExtractor):
|
|||||||
|
|
||||||
release_url = self._parse_json(self._search_regex(
|
release_url = self._parse_json(self._search_regex(
|
||||||
r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
|
r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'),
|
||||||
video_id)['release_url'] + '&switch=http'
|
video_id)['release_url']
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': 'ThePlatform',
|
'ie_key': 'ThePlatform',
|
||||||
'url': smuggle_url(release_url, {'force_smil_url': True}),
|
'url': smuggle_url(update_url_query(
|
||||||
|
release_url, {'switch': 'http'}), {'force_smil_url': True}),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
}
|
}
|
||||||
|
@ -1,76 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
xpath_text,
|
|
||||||
xpath_with_ns,
|
|
||||||
)
|
|
||||||
from .youtube import YoutubeIE
|
|
||||||
|
|
||||||
|
|
||||||
class GamekingsIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
# YouTube embed video
|
|
||||||
'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/',
|
|
||||||
'md5': '5208d3a17adeaef829a7861887cb9029',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'HkSQKetlGOU',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Phoenix Wright: Ace Attorney - Dual Destinies Review',
|
|
||||||
'description': 'md5:db88c0e7f47e9ea50df3271b9dc72e1d',
|
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
|
||||||
'uploader_id': 'UCJugRGo4STYMeFr5RoOShtQ',
|
|
||||||
'uploader': 'Gamekings Vault',
|
|
||||||
'upload_date': '20151123',
|
|
||||||
},
|
|
||||||
'add_ie': ['Youtube'],
|
|
||||||
}, {
|
|
||||||
# vimeo video
|
|
||||||
'url': 'http://www.gamekings.nl/videos/the-legend-of-zelda-majoras-mask/',
|
|
||||||
'md5': '12bf04dfd238e70058046937657ea68d',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'the-legend-of-zelda-majoras-mask',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'The Legend of Zelda: Majora’s Mask',
|
|
||||||
'description': 'md5:9917825fe0e9f4057601fe1e38860de3',
|
|
||||||
'thumbnail': 're:^https?://.*\.jpg$',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.gamekings.nl/nieuws/gamekings-extra-shelly-en-david-bereiden-zich-voor-op-de-livestream/',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
playlist_id = self._search_regex(
|
|
||||||
r'gogoVideo\([^,]+,\s*"([^"]+)', webpage, 'playlist id')
|
|
||||||
|
|
||||||
# Check if a YouTube embed is used
|
|
||||||
if YoutubeIE.suitable(playlist_id):
|
|
||||||
return self.url_result(playlist_id, ie='Youtube')
|
|
||||||
|
|
||||||
playlist = self._download_xml(
|
|
||||||
'http://www.gamekings.tv/wp-content/themes/gk2010/rss_playlist.php?id=%s' % playlist_id,
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
NS_MAP = {
|
|
||||||
'jwplayer': 'http://rss.jwpcdn.com/'
|
|
||||||
}
|
|
||||||
|
|
||||||
item = playlist.find('./channel/item')
|
|
||||||
|
|
||||||
thumbnail = xpath_text(item, xpath_with_ns('./jwplayer:image', NS_MAP), 'thumbnail')
|
|
||||||
video_url = item.find(xpath_with_ns('./jwplayer:source', NS_MAP)).get('file')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'url': video_url,
|
|
||||||
'title': self._og_search_title(webpage),
|
|
||||||
'description': self._og_search_description(webpage),
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
}
|
|
@ -2207,6 +2207,14 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.url_result(
|
return self.url_result(
|
||||||
self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
|
self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
|
||||||
|
|
||||||
|
# Look for VODPlatform embeds
|
||||||
|
mobj = re.search(
|
||||||
|
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vod-platform\.net/embed/[^/?#]+)',
|
||||||
|
webpage)
|
||||||
|
if mobj is not None:
|
||||||
|
return self.url_result(
|
||||||
|
self._proto_relative_url(unescapeHTML(mobj.group(1))), 'VODPlatform')
|
||||||
|
|
||||||
# Look for Instagram embeds
|
# Look for Instagram embeds
|
||||||
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
|
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
|
||||||
if instagram_embed_url is not None:
|
if instagram_embed_url is not None:
|
||||||
@ -2233,8 +2241,8 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
# Looking for http://schema.org/VideoObject
|
# Looking for http://schema.org/VideoObject
|
||||||
json_ld = self._search_json_ld(
|
json_ld = self._search_json_ld(
|
||||||
webpage, video_id, default=None, expected_type='VideoObject')
|
webpage, video_id, default={}, expected_type='VideoObject')
|
||||||
if json_ld and json_ld.get('url'):
|
if json_ld.get('url'):
|
||||||
info_dict.update({
|
info_dict.update({
|
||||||
'title': video_title or info_dict['title'],
|
'title': video_title or info_dict['title'],
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
|
@ -4,10 +4,12 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
mimetype2ext,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -28,74 +30,84 @@ class JWPlatformBaseIE(InfoExtractor):
|
|||||||
return self._parse_jwplayer_data(
|
return self._parse_jwplayer_data(
|
||||||
jwplayer_data, video_id, *args, **kwargs)
|
jwplayer_data, video_id, *args, **kwargs)
|
||||||
|
|
||||||
def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None):
|
def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None, base_url=None):
|
||||||
# JWPlayer backward compatibility: flattened playlists
|
# JWPlayer backward compatibility: flattened playlists
|
||||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
|
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
|
||||||
if 'playlist' not in jwplayer_data:
|
if 'playlist' not in jwplayer_data:
|
||||||
jwplayer_data = {'playlist': [jwplayer_data]}
|
jwplayer_data = {'playlist': [jwplayer_data]}
|
||||||
|
|
||||||
video_data = jwplayer_data['playlist'][0]
|
entries = []
|
||||||
|
for video_data in jwplayer_data['playlist']:
|
||||||
|
# JWPlayer backward compatibility: flattened sources
|
||||||
|
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
|
||||||
|
if 'sources' not in video_data:
|
||||||
|
video_data['sources'] = [video_data]
|
||||||
|
|
||||||
# JWPlayer backward compatibility: flattened sources
|
formats = []
|
||||||
# https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
|
for source in video_data['sources']:
|
||||||
if 'sources' not in video_data:
|
source_url = self._proto_relative_url(source['file'])
|
||||||
video_data['sources'] = [video_data]
|
if base_url:
|
||||||
|
source_url = compat_urlparse.urljoin(base_url, source_url)
|
||||||
formats = []
|
source_type = source.get('type') or ''
|
||||||
for source in video_data['sources']:
|
ext = mimetype2ext(source_type) or determine_ext(source_url)
|
||||||
source_url = self._proto_relative_url(source['file'])
|
if source_type == 'hls' or ext == 'm3u8':
|
||||||
source_type = source.get('type') or ''
|
formats.extend(self._extract_m3u8_formats(
|
||||||
if source_type in ('application/vnd.apple.mpegurl', 'hls') or determine_ext(source_url) == 'm3u8':
|
source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
|
||||||
formats.extend(self._extract_m3u8_formats(
|
# https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
|
||||||
source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
|
elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
|
||||||
elif source_type.startswith('audio'):
|
formats.append({
|
||||||
formats.append({
|
'url': source_url,
|
||||||
'url': source_url,
|
'vcodec': 'none',
|
||||||
'vcodec': 'none',
|
'ext': ext,
|
||||||
})
|
|
||||||
else:
|
|
||||||
a_format = {
|
|
||||||
'url': source_url,
|
|
||||||
'width': int_or_none(source.get('width')),
|
|
||||||
'height': int_or_none(source.get('height')),
|
|
||||||
}
|
|
||||||
if source_url.startswith('rtmp'):
|
|
||||||
a_format['ext'] = 'flv',
|
|
||||||
|
|
||||||
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
|
||||||
# of jwplayer.flash.swf
|
|
||||||
rtmp_url_parts = re.split(
|
|
||||||
r'((?:mp4|mp3|flv):)', source_url, 1)
|
|
||||||
if len(rtmp_url_parts) == 3:
|
|
||||||
rtmp_url, prefix, play_path = rtmp_url_parts
|
|
||||||
a_format.update({
|
|
||||||
'url': rtmp_url,
|
|
||||||
'play_path': prefix + play_path,
|
|
||||||
})
|
|
||||||
if rtmp_params:
|
|
||||||
a_format.update(rtmp_params)
|
|
||||||
formats.append(a_format)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
tracks = video_data.get('tracks')
|
|
||||||
if tracks and isinstance(tracks, list):
|
|
||||||
for track in tracks:
|
|
||||||
if track.get('file') and track.get('kind') == 'captions':
|
|
||||||
subtitles.setdefault(track.get('label') or 'en', []).append({
|
|
||||||
'url': self._proto_relative_url(track['file'])
|
|
||||||
})
|
})
|
||||||
|
else:
|
||||||
|
a_format = {
|
||||||
|
'url': source_url,
|
||||||
|
'width': int_or_none(source.get('width')),
|
||||||
|
'height': int_or_none(source.get('height')),
|
||||||
|
'ext': ext,
|
||||||
|
}
|
||||||
|
if source_url.startswith('rtmp'):
|
||||||
|
a_format['ext'] = 'flv',
|
||||||
|
|
||||||
return {
|
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
|
||||||
'id': video_id,
|
# of jwplayer.flash.swf
|
||||||
'title': video_data['title'] if require_title else video_data.get('title'),
|
rtmp_url_parts = re.split(
|
||||||
'description': video_data.get('description'),
|
r'((?:mp4|mp3|flv):)', source_url, 1)
|
||||||
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
if len(rtmp_url_parts) == 3:
|
||||||
'timestamp': int_or_none(video_data.get('pubdate')),
|
rtmp_url, prefix, play_path = rtmp_url_parts
|
||||||
'duration': float_or_none(jwplayer_data.get('duration')),
|
a_format.update({
|
||||||
'subtitles': subtitles,
|
'url': rtmp_url,
|
||||||
'formats': formats,
|
'play_path': prefix + play_path,
|
||||||
}
|
})
|
||||||
|
if rtmp_params:
|
||||||
|
a_format.update(rtmp_params)
|
||||||
|
formats.append(a_format)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
tracks = video_data.get('tracks')
|
||||||
|
if tracks and isinstance(tracks, list):
|
||||||
|
for track in tracks:
|
||||||
|
if track.get('file') and track.get('kind') == 'captions':
|
||||||
|
subtitles.setdefault(track.get('label') or 'en', []).append({
|
||||||
|
'url': self._proto_relative_url(track['file'])
|
||||||
|
})
|
||||||
|
|
||||||
|
entries.append({
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_data['title'] if require_title else video_data.get('title'),
|
||||||
|
'description': video_data.get('description'),
|
||||||
|
'thumbnail': self._proto_relative_url(video_data.get('image')),
|
||||||
|
'timestamp': int_or_none(video_data.get('pubdate')),
|
||||||
|
'duration': float_or_none(jwplayer_data.get('duration')),
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'formats': formats,
|
||||||
|
})
|
||||||
|
if len(entries) == 1:
|
||||||
|
return entries[0]
|
||||||
|
else:
|
||||||
|
return self.playlist_result(entries)
|
||||||
|
|
||||||
|
|
||||||
class JWPlatformIE(JWPlatformBaseIE):
|
class JWPlatformIE(JWPlatformBaseIE):
|
||||||
|
@ -62,6 +62,11 @@ class KalturaIE(InfoExtractor):
|
|||||||
{
|
{
|
||||||
'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.30.2/mwEmbedFrame.php/p/1337/uiconf_id/20540612/entry_id/1_sf5ovm7u?wid=_243342',
|
'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.30.2/mwEmbedFrame.php/p/1337/uiconf_id/20540612/entry_id/1_sf5ovm7u?wid=_243342',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# video with subtitles
|
||||||
|
'url': 'kaltura:111032:1_cw786r8q',
|
||||||
|
'only_matching': True,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -130,7 +135,6 @@ class KalturaIE(InfoExtractor):
|
|||||||
video_id, actions, service_url, note='Downloading Kaltura signature')['ks']
|
video_id, actions, service_url, note='Downloading Kaltura signature')['ks']
|
||||||
|
|
||||||
def _get_video_info(self, video_id, partner_id, service_url=None):
|
def _get_video_info(self, video_id, partner_id, service_url=None):
|
||||||
signature = self._get_kaltura_signature(video_id, partner_id, service_url)
|
|
||||||
actions = [
|
actions = [
|
||||||
{
|
{
|
||||||
'action': 'null',
|
'action': 'null',
|
||||||
@ -138,18 +142,30 @@ class KalturaIE(InfoExtractor):
|
|||||||
'clientTag': 'kdp:v3.8.5',
|
'clientTag': 'kdp:v3.8.5',
|
||||||
'format': 1, # JSON, 2 = XML, 3 = PHP
|
'format': 1, # JSON, 2 = XML, 3 = PHP
|
||||||
'service': 'multirequest',
|
'service': 'multirequest',
|
||||||
'ks': signature,
|
},
|
||||||
|
{
|
||||||
|
'expiry': 86400,
|
||||||
|
'service': 'session',
|
||||||
|
'action': 'startWidgetSession',
|
||||||
|
'widgetId': '_%s' % partner_id,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'action': 'get',
|
'action': 'get',
|
||||||
'entryId': video_id,
|
'entryId': video_id,
|
||||||
'service': 'baseentry',
|
'service': 'baseentry',
|
||||||
'version': '-1',
|
'ks': '{1:result:ks}',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'action': 'getbyentryid',
|
'action': 'getbyentryid',
|
||||||
'entryId': video_id,
|
'entryId': video_id,
|
||||||
'service': 'flavorAsset',
|
'service': 'flavorAsset',
|
||||||
|
'ks': '{1:result:ks}',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'action': 'list',
|
||||||
|
'filter:entryIdEqual': video_id,
|
||||||
|
'service': 'caption_captionasset',
|
||||||
|
'ks': '{1:result:ks}',
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
return self._kaltura_api_call(
|
return self._kaltura_api_call(
|
||||||
@ -161,8 +177,9 @@ class KalturaIE(InfoExtractor):
|
|||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
partner_id, entry_id = mobj.group('partner_id', 'id')
|
partner_id, entry_id = mobj.group('partner_id', 'id')
|
||||||
ks = None
|
ks = None
|
||||||
|
captions = None
|
||||||
if partner_id and entry_id:
|
if partner_id and entry_id:
|
||||||
info, flavor_assets = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url'))
|
_, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url'))
|
||||||
else:
|
else:
|
||||||
path, query = mobj.group('path', 'query')
|
path, query = mobj.group('path', 'query')
|
||||||
if not path and not query:
|
if not path and not query:
|
||||||
@ -181,7 +198,7 @@ class KalturaIE(InfoExtractor):
|
|||||||
raise ExtractorError('Invalid URL', expected=True)
|
raise ExtractorError('Invalid URL', expected=True)
|
||||||
if 'entry_id' in params:
|
if 'entry_id' in params:
|
||||||
entry_id = params['entry_id'][0]
|
entry_id = params['entry_id'][0]
|
||||||
info, flavor_assets = self._get_video_info(entry_id, partner_id)
|
_, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id)
|
||||||
elif 'uiconf_id' in params and 'flashvars[referenceId]' in params:
|
elif 'uiconf_id' in params and 'flashvars[referenceId]' in params:
|
||||||
reference_id = params['flashvars[referenceId]'][0]
|
reference_id = params['flashvars[referenceId]'][0]
|
||||||
webpage = self._download_webpage(url, reference_id)
|
webpage = self._download_webpage(url, reference_id)
|
||||||
@ -217,7 +234,7 @@ class KalturaIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
for f in flavor_assets:
|
for f in flavor_assets:
|
||||||
# Continue if asset is not ready
|
# Continue if asset is not ready
|
||||||
if f['status'] != 2:
|
if f.get('status') != 2:
|
||||||
continue
|
continue
|
||||||
video_url = sign_url(
|
video_url = sign_url(
|
||||||
'%s/flavorId/%s' % (data_url, f['id']))
|
'%s/flavorId/%s' % (data_url, f['id']))
|
||||||
@ -240,13 +257,24 @@ class KalturaIE(InfoExtractor):
|
|||||||
m3u8_url, entry_id, 'mp4', 'm3u8_native',
|
m3u8_url, entry_id, 'mp4', 'm3u8_native',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
self._check_formats(formats, entry_id)
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
if captions:
|
||||||
|
for caption in captions.get('objects', []):
|
||||||
|
# Continue if caption is not ready
|
||||||
|
if f.get('status') != 2:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(caption.get('languageCode') or caption.get('language'), []).append({
|
||||||
|
'url': '%s/api_v3/service/caption_captionasset/action/serve/captionAssetId/%s' % (self._SERVICE_URL, caption['id']),
|
||||||
|
'ext': caption.get('fileExt'),
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': entry_id,
|
'id': entry_id,
|
||||||
'title': info['name'],
|
'title': info['name'],
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
'description': clean_html(info.get('description')),
|
'description': clean_html(info.get('description')),
|
||||||
'thumbnail': info.get('thumbnailUrl'),
|
'thumbnail': info.get('thumbnailUrl'),
|
||||||
'duration': info.get('duration'),
|
'duration': info.get('duration'),
|
||||||
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urlparse
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
clean_html,
|
clean_html,
|
||||||
@ -242,8 +243,9 @@ class KuwoSingerIE(InfoExtractor):
|
|||||||
query={'artistId': artist_id, 'pn': page_num, 'rn': self.PAGE_SIZE})
|
query={'artistId': artist_id, 'pn': page_num, 'rn': self.PAGE_SIZE})
|
||||||
|
|
||||||
return [
|
return [
|
||||||
self.url_result(song_url, 'Kuwo') for song_url in re.findall(
|
self.url_result(compat_urlparse.urljoin(url, song_url), 'Kuwo')
|
||||||
r'<div[^>]+class="name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)',
|
for song_url in re.findall(
|
||||||
|
r'<div[^>]+class="name"><a[^>]+href="(/yinyue/\d+)',
|
||||||
webpage)
|
webpage)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -53,11 +53,17 @@ class LimelightBaseIE(InfoExtractor):
|
|||||||
'height': int_or_none(stream.get('videoHeightInPixels')),
|
'height': int_or_none(stream.get('videoHeightInPixels')),
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
}
|
}
|
||||||
rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', stream_url)
|
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp4:.+)$', stream_url)
|
||||||
if rtmp:
|
if rtmp:
|
||||||
format_id = 'rtmp'
|
format_id = 'rtmp'
|
||||||
if stream.get('videoBitRate'):
|
if stream.get('videoBitRate'):
|
||||||
format_id += '-%d' % int_or_none(stream['videoBitRate'])
|
format_id += '-%d' % int_or_none(stream['videoBitRate'])
|
||||||
|
http_fmt = fmt.copy()
|
||||||
|
http_fmt.update({
|
||||||
|
'url': 'http://%s/%s' % (rtmp.group('host').replace('csl.', 'cpl.'), rtmp.group('playpath')[4:]),
|
||||||
|
'format_id': format_id.replace('rtmp', 'http'),
|
||||||
|
})
|
||||||
|
formats.append(http_fmt)
|
||||||
fmt.update({
|
fmt.update({
|
||||||
'url': rtmp.group('url'),
|
'url': rtmp.group('url'),
|
||||||
'play_path': rtmp.group('playpath'),
|
'play_path': rtmp.group('playpath'),
|
||||||
@ -166,9 +172,10 @@ class LimelightMediaIE(LimelightBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
# video with subtitles
|
# video with subtitles
|
||||||
'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335',
|
'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335',
|
||||||
|
'md5': '2fa3bad9ac321e23860ca23bc2c69e3d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'a3e00274d4564ec4a9b29b9466432335',
|
'id': 'a3e00274d4564ec4a9b29b9466432335',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': '3Play Media Overview Video',
|
'title': '3Play Media Overview Video',
|
||||||
'thumbnail': 're:^https?://.*\.jpeg$',
|
'thumbnail': 're:^https?://.*\.jpeg$',
|
||||||
'duration': 78.101,
|
'duration': 78.101,
|
||||||
@ -176,10 +183,6 @@ class LimelightMediaIE(LimelightBaseIE):
|
|||||||
'upload_date': '20120605',
|
'upload_date': '20120605',
|
||||||
'subtitles': 'mincount:9',
|
'subtitles': 'mincount:9',
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
|
'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -1,16 +1,19 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
url_basename,
|
url_basename,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
|
get_element_by_class,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NationalGeographicIE(InfoExtractor):
|
class NationalGeographicVideoIE(InfoExtractor):
|
||||||
IE_NAME = 'natgeo'
|
IE_NAME = 'natgeo:video'
|
||||||
_VALID_URL = r'https?://video\.nationalgeographic\.com/.*?'
|
_VALID_URL = r'https?://video\.nationalgeographic\.com/.*?'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
@ -62,16 +65,16 @@ class NationalGeographicIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class NationalGeographicChannelIE(ThePlatformIE):
|
class NationalGeographicIE(ThePlatformIE):
|
||||||
IE_NAME = 'natgeo:channel'
|
IE_NAME = 'natgeo'
|
||||||
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/videos/(?P<id>[^/?]+)'
|
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?[^/]+/(?:videos|episodes)/(?P<id>[^/?]+)'
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
|
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/videos/uncovering-a-universal-knowledge/',
|
||||||
'md5': '518c9aa655686cf81493af5cc21e2a04',
|
'md5': '518c9aa655686cf81493af5cc21e2a04',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'nB5vIAfmyllm',
|
'id': 'vKInpacll2pC',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Uncovering a Universal Knowledge',
|
'title': 'Uncovering a Universal Knowledge',
|
||||||
'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a',
|
'description': 'md5:1a89148475bf931b3661fcd6ddb2ae3a',
|
||||||
@ -85,7 +88,7 @@ class NationalGeographicChannelIE(ThePlatformIE):
|
|||||||
'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
|
'url': 'http://channel.nationalgeographic.com/wild/destination-wild/videos/the-stunning-red-bird-of-paradise/',
|
||||||
'md5': 'c4912f656b4cbe58f3e000c489360989',
|
'md5': 'c4912f656b4cbe58f3e000c489360989',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3TmMv9OvGwIR',
|
'id': 'Pok5lWCkiEFA',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The Stunning Red Bird of Paradise',
|
'title': 'The Stunning Red Bird of Paradise',
|
||||||
'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c',
|
'description': 'md5:7bc8cd1da29686be4d17ad1230f0140c',
|
||||||
@ -95,6 +98,10 @@ class NationalGeographicChannelIE(ThePlatformIE):
|
|||||||
},
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
'add_ie': ['ThePlatform'],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episodes/the-power-of-miracles/',
|
||||||
|
'only_matching': True,
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -122,3 +129,40 @@ class NationalGeographicChannelIE(ThePlatformIE):
|
|||||||
{'force_smil_url': True}),
|
{'force_smil_url': True}),
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NationalGeographicEpisodeGuideIE(ThePlatformIE):
|
||||||
|
IE_NAME = 'natgeo:episodeguide'
|
||||||
|
_VALID_URL = r'https?://channel\.nationalgeographic\.com/(?:wild/)?(?P<id>[^/]+)/episode-guide'
|
||||||
|
_TESTS = [
|
||||||
|
{
|
||||||
|
'url': 'http://channel.nationalgeographic.com/the-story-of-god-with-morgan-freeman/episode-guide/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'the-story-of-god-with-morgan-freeman-season-1',
|
||||||
|
'title': 'The Story of God with Morgan Freeman - Season 1',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 6,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
'url': 'http://channel.nationalgeographic.com/underworld-inc/episode-guide/?s=2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'underworld-inc-season-2',
|
||||||
|
'title': 'Underworld, Inc. - Season 2',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 7,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
show = get_element_by_class('show', webpage)
|
||||||
|
selected_season = self._search_regex(
|
||||||
|
r'<div[^>]+class="select-seasons[^"]*".*?<a[^>]*>(.*?)</a>',
|
||||||
|
webpage, 'selected season')
|
||||||
|
entries = [
|
||||||
|
self.url_result(self._proto_relative_url(entry_url), 'NationalGeographic')
|
||||||
|
for entry_url in re.findall('(?s)<div[^>]+class="col-inner"[^>]*?>.*?<a[^>]+href="([^"]+)"', webpage)]
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, '%s-%s' % (display_id, selected_season.lower().replace(' ', '-')),
|
||||||
|
'%s - %s' % (show, selected_season))
|
||||||
|
@ -4,12 +4,10 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
|
||||||
compat_urllib_parse_urlencode,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -51,48 +49,74 @@ class NaverIE(InfoExtractor):
|
|||||||
if error:
|
if error:
|
||||||
raise ExtractorError(error, expected=True)
|
raise ExtractorError(error, expected=True)
|
||||||
raise ExtractorError('couldn\'t extract vid and key')
|
raise ExtractorError('couldn\'t extract vid and key')
|
||||||
vid = m_id.group(1)
|
video_data = self._download_json(
|
||||||
key = m_id.group(2)
|
'http://play.rmcnmv.naver.com/vod/play/v2.0/' + m_id.group(1),
|
||||||
query = compat_urllib_parse_urlencode({'vid': vid, 'inKey': key, })
|
video_id, query={
|
||||||
query_urls = compat_urllib_parse_urlencode({
|
'key': m_id.group(2),
|
||||||
'masterVid': vid,
|
})
|
||||||
'protocol': 'p2p',
|
meta = video_data['meta']
|
||||||
'inKey': key,
|
title = meta['subject']
|
||||||
})
|
|
||||||
info = self._download_xml(
|
|
||||||
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
|
|
||||||
video_id, 'Downloading video info')
|
|
||||||
urls = self._download_xml(
|
|
||||||
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
|
|
||||||
video_id, 'Downloading video formats info')
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_el in urls.findall('EncodingOptions/EncodingOption'):
|
|
||||||
domain = format_el.find('Domain').text
|
def extract_formats(streams, stream_type, query={}):
|
||||||
uri = format_el.find('uri').text
|
for stream in streams:
|
||||||
f = {
|
stream_url = stream.get('source')
|
||||||
'url': compat_urlparse.urljoin(domain, uri),
|
if not stream_url:
|
||||||
'ext': 'mp4',
|
continue
|
||||||
'width': int(format_el.find('width').text),
|
stream_url = update_url_query(stream_url, query)
|
||||||
'height': int(format_el.find('height').text),
|
encoding_option = stream.get('encodingOption', {})
|
||||||
}
|
bitrate = stream.get('bitrate', {})
|
||||||
if domain.startswith('rtmp'):
|
formats.append({
|
||||||
# urlparse does not support custom schemes
|
'format_id': '%s_%s' % (stream.get('type') or stream_type, encoding_option.get('id') or encoding_option.get('name')),
|
||||||
# https://bugs.python.org/issue18828
|
'url': stream_url,
|
||||||
f.update({
|
'width': int_or_none(encoding_option.get('width')),
|
||||||
'url': domain + uri,
|
'height': int_or_none(encoding_option.get('height')),
|
||||||
'ext': 'flv',
|
'vbr': int_or_none(bitrate.get('video')),
|
||||||
'rtmp_protocol': '1', # rtmpt
|
'abr': int_or_none(bitrate.get('audio')),
|
||||||
|
'filesize': int_or_none(stream.get('size')),
|
||||||
|
'protocol': 'm3u8_native' if stream_type == 'HLS' else None,
|
||||||
})
|
})
|
||||||
formats.append(f)
|
|
||||||
|
extract_formats(video_data.get('videos', {}).get('list', []), 'H264')
|
||||||
|
for stream_set in video_data.get('streams', []):
|
||||||
|
query = {}
|
||||||
|
for param in stream_set.get('keys', []):
|
||||||
|
query[param['name']] = param['value']
|
||||||
|
stream_type = stream_set.get('type')
|
||||||
|
videos = stream_set.get('videos')
|
||||||
|
if videos:
|
||||||
|
extract_formats(videos, stream_type, query)
|
||||||
|
elif stream_type == 'HLS':
|
||||||
|
stream_url = stream_set.get('source')
|
||||||
|
if not stream_url:
|
||||||
|
continue
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
update_url_query(stream_url, query), video_id,
|
||||||
|
'mp4', 'm3u8_native', m3u8_id=stream_type, fatal=False))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for caption in video_data.get('captions', {}).get('list', []):
|
||||||
|
caption_url = caption.get('source')
|
||||||
|
if not caption_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(caption.get('language') or caption.get('locale'), []).append({
|
||||||
|
'url': caption_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
upload_date = self._search_regex(
|
||||||
|
r'<span[^>]+class="date".*?(\d{4}\.\d{2}\.\d{2})',
|
||||||
|
webpage, 'upload date', fatal=False)
|
||||||
|
if upload_date:
|
||||||
|
upload_date = upload_date.replace('.', '')
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': info.find('Subject').text,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
'description': self._og_search_description(webpage),
|
'description': self._og_search_description(webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': meta.get('cover', {}).get('source') or self._og_search_thumbnail(webpage),
|
||||||
'upload_date': info.find('WriteDate').text.replace('.', ''),
|
'view_count': int_or_none(meta.get('count')),
|
||||||
'view_count': int(info.find('PlayCount').text),
|
'upload_date': upload_date,
|
||||||
}
|
}
|
||||||
|
@ -1,15 +1,14 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals, division
|
||||||
|
|
||||||
import re
|
import math
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_chr
|
from ..compat import compat_chr
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
decode_png,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
encode_base_n,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
mimetype2ext,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -41,60 +40,6 @@ class OpenloadIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def openload_level2_debase(m):
|
|
||||||
radix, num = int(m.group(1)) + 27, int(m.group(2))
|
|
||||||
return '"' + encode_base_n(num, radix) + '"'
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def openload_level2(cls, txt):
|
|
||||||
# The function name is ǃ \u01c3
|
|
||||||
# Using escaped unicode literals does not work in Python 3.2
|
|
||||||
return re.sub(r'ǃ\((\d+),(\d+)\)', cls.openload_level2_debase, txt, re.UNICODE).replace('"+"', '')
|
|
||||||
|
|
||||||
# Openload uses a variant of aadecode
|
|
||||||
# openload_decode and related functions are originally written by
|
|
||||||
# vitas@matfyz.cz and released with public domain
|
|
||||||
# See https://github.com/rg3/youtube-dl/issues/8489
|
|
||||||
@classmethod
|
|
||||||
def openload_decode(cls, txt):
|
|
||||||
symbol_table = [
|
|
||||||
('_', '(゚Д゚) [゚Θ゚]'),
|
|
||||||
('a', '(゚Д゚) [゚ω゚ノ]'),
|
|
||||||
('b', '(゚Д゚) [゚Θ゚ノ]'),
|
|
||||||
('c', '(゚Д゚) [\'c\']'),
|
|
||||||
('d', '(゚Д゚) [゚ー゚ノ]'),
|
|
||||||
('e', '(゚Д゚) [゚Д゚ノ]'),
|
|
||||||
('f', '(゚Д゚) [1]'),
|
|
||||||
|
|
||||||
('o', '(゚Д゚) [\'o\']'),
|
|
||||||
('u', '(o゚ー゚o)'),
|
|
||||||
('c', '(゚Д゚) [\'c\']'),
|
|
||||||
|
|
||||||
('7', '((゚ー゚) + (o^_^o))'),
|
|
||||||
('6', '((o^_^o) +(o^_^o) +(c^_^o))'),
|
|
||||||
('5', '((゚ー゚) + (゚Θ゚))'),
|
|
||||||
('4', '(-~3)'),
|
|
||||||
('3', '(-~-~1)'),
|
|
||||||
('2', '(-~1)'),
|
|
||||||
('1', '(-~0)'),
|
|
||||||
('0', '((c^_^o)-(c^_^o))'),
|
|
||||||
]
|
|
||||||
delim = '(゚Д゚)[゚ε゚]+'
|
|
||||||
ret = ''
|
|
||||||
for aachar in txt.split(delim):
|
|
||||||
for val, pat in symbol_table:
|
|
||||||
aachar = aachar.replace(pat, val)
|
|
||||||
aachar = aachar.replace('+ ', '')
|
|
||||||
m = re.match(r'^\d+', aachar)
|
|
||||||
if m:
|
|
||||||
ret += compat_chr(int(m.group(0), 8))
|
|
||||||
else:
|
|
||||||
m = re.match(r'^u([\da-f]+)', aachar)
|
|
||||||
if m:
|
|
||||||
ret += compat_chr(int(m.group(1), 16))
|
|
||||||
return cls.openload_level2(ret)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
@ -102,29 +47,77 @@ class OpenloadIE(InfoExtractor):
|
|||||||
if 'File not found' in webpage:
|
if 'File not found' in webpage:
|
||||||
raise ExtractorError('File not found', expected=True)
|
raise ExtractorError('File not found', expected=True)
|
||||||
|
|
||||||
code = self._search_regex(
|
# The following extraction logic is proposed by @Belderak and @gdkchan
|
||||||
r'</video>\s*</div>\s*<script[^>]+>[^>]+</script>\s*<script[^>]+>([^<]+)</script>',
|
# and declared to be used freely in youtube-dl
|
||||||
webpage, 'JS code')
|
# See https://github.com/rg3/youtube-dl/issues/9706
|
||||||
|
|
||||||
decoded = self.openload_decode(code)
|
numbers_js = self._download_webpage(
|
||||||
|
'https://openload.co/assets/js/obfuscator/n.js', video_id,
|
||||||
|
note='Downloading signature numbers')
|
||||||
|
signums = self._search_regex(
|
||||||
|
r'window\.signatureNumbers\s*=\s*[\'"](?P<data>[a-z]+)[\'"]',
|
||||||
|
numbers_js, 'signature numbers', group='data')
|
||||||
|
|
||||||
video_url = self._search_regex(
|
linkimg_uri = self._search_regex(
|
||||||
r'return\s+"(https?://[^"]+)"', decoded, 'video URL')
|
r'<img[^>]+id="linkimg"[^>]+src="([^"]+)"', webpage, 'link image')
|
||||||
|
linkimg = self._request_webpage(
|
||||||
|
linkimg_uri, video_id, note=False).read()
|
||||||
|
|
||||||
|
width, height, pixels = decode_png(linkimg)
|
||||||
|
|
||||||
|
output = ''
|
||||||
|
for y in range(height):
|
||||||
|
for x in range(width):
|
||||||
|
r, g, b = pixels[y][3 * x:3 * x + 3]
|
||||||
|
if r == 0 and g == 0 and b == 0:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
output += compat_chr(r)
|
||||||
|
output += compat_chr(g)
|
||||||
|
output += compat_chr(b)
|
||||||
|
|
||||||
|
img_str_length = len(output) // 200
|
||||||
|
img_str = [[0 for x in range(img_str_length)] for y in range(10)]
|
||||||
|
|
||||||
|
sig_str_length = len(signums) // 260
|
||||||
|
sig_str = [[0 for x in range(sig_str_length)] for y in range(10)]
|
||||||
|
|
||||||
|
for i in range(10):
|
||||||
|
for j in range(img_str_length):
|
||||||
|
begin = i * img_str_length * 20 + j * 20
|
||||||
|
img_str[i][j] = output[begin:begin + 20]
|
||||||
|
for j in range(sig_str_length):
|
||||||
|
begin = i * sig_str_length * 26 + j * 26
|
||||||
|
sig_str[i][j] = signums[begin:begin + 26]
|
||||||
|
|
||||||
|
parts = []
|
||||||
|
# TODO: find better names for str_, chr_ and sum_
|
||||||
|
str_ = ''
|
||||||
|
for i in [2, 3, 5, 7]:
|
||||||
|
str_ = ''
|
||||||
|
sum_ = float(99)
|
||||||
|
for j in range(len(sig_str[i])):
|
||||||
|
for chr_idx in range(len(img_str[i][j])):
|
||||||
|
if sum_ > float(122):
|
||||||
|
sum_ = float(98)
|
||||||
|
chr_ = compat_chr(int(math.floor(sum_)))
|
||||||
|
if sig_str[i][j][chr_idx] == chr_ and j >= len(str_):
|
||||||
|
sum_ += float(2.5)
|
||||||
|
str_ += img_str[i][j][chr_idx]
|
||||||
|
parts.append(str_.replace(',', ''))
|
||||||
|
|
||||||
|
video_url = 'https://openload.co/stream/%s~%s~%s~%s' % (parts[3], parts[1], parts[2], parts[0])
|
||||||
|
|
||||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||||
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
||||||
'title', default=None) or self._html_search_meta(
|
'title', default=None) or self._html_search_meta(
|
||||||
'description', webpage, 'title', fatal=True)
|
'description', webpage, 'title', fatal=True)
|
||||||
|
|
||||||
ext = mimetype2ext(self._search_regex(
|
|
||||||
r'window\.vt\s*=\s*(["\'])(?P<mimetype>.+?)\1', decoded,
|
|
||||||
'mimetype', default=None, group='mimetype')) or determine_ext(
|
|
||||||
video_url, 'mp4')
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'ext': ext,
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
# Seems all videos have extensions in their titles
|
||||||
|
'ext': determine_ext(title),
|
||||||
}
|
}
|
||||||
|
58
youtube_dl/extractor/pokemon.py
Normal file
58
youtube_dl/extractor/pokemon.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PokemonIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/[^/]+/\d+_\d+-(?P<display_id>[^/?#]+))'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.pokemon.com/us/pokemon-episodes/19_01-from-a-to-z/?play=true',
|
||||||
|
'md5': '9fb209ae3a569aac25de0f5afc4ee08f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'd0436c00c3ce4071ac6cee8130ac54a1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'From A to Z!',
|
||||||
|
'description': 'Bonnie makes a new friend, Ash runs into an old friend, and a terrifying premonition begins to unfold!',
|
||||||
|
'timestamp': 1460478136,
|
||||||
|
'upload_date': '20160412',
|
||||||
|
},
|
||||||
|
'add_id': ['LimelightMedia']
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
webpage = self._download_webpage(url, video_id or display_id)
|
||||||
|
video_data = extract_attributes(self._search_regex(
|
||||||
|
r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'),
|
||||||
|
webpage, 'video data element'))
|
||||||
|
video_id = video_data['data-video-id']
|
||||||
|
title = video_data['data-video-title']
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'id': video_id,
|
||||||
|
'url': 'limelight:media:%s' % video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video_data.get('data-video-summary'),
|
||||||
|
'thumbnail': video_data.get('data-video-poster'),
|
||||||
|
'series': 'Pokémon',
|
||||||
|
'season_number': int_or_none(video_data.get('data-video-season')),
|
||||||
|
'episode': title,
|
||||||
|
'episode_number': int_or_none(video_data.get('data-video-episode')),
|
||||||
|
'ie_key': 'LimelightMedia',
|
||||||
|
}
|
@ -1,55 +1,71 @@
|
|||||||
# encoding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
clean_html,
|
||||||
|
int_or_none,
|
||||||
|
unified_timestamp,
|
||||||
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class RBMARadioIE(InfoExtractor):
|
class RBMARadioIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
|
_VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.rbmaradio.com/shows/ford-lopatin-live-at-primavera-sound-2011',
|
'url': 'https://www.rbmaradio.com/shows/main-stage/episodes/ford-lopatin-live-at-primavera-sound-2011',
|
||||||
'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
|
'md5': '6bc6f9bcb18994b4c983bc3bf4384d95',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ford-lopatin-live-at-primavera-sound-2011',
|
'id': 'ford-lopatin-live-at-primavera-sound-2011',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'uploader_id': 'ford-lopatin',
|
'title': 'Main Stage - Ford & Lopatin',
|
||||||
'location': 'Spain',
|
'description': 'md5:4f340fb48426423530af5a9d87bd7b91',
|
||||||
'description': 'Joel Ford and Daniel ’Oneohtrix Point Never’ Lopatin fly their midified pop extravaganza to Spain. Live at Primavera Sound 2011.',
|
'thumbnail': 're:^https?://.*\.jpg',
|
||||||
'uploader': 'Ford & Lopatin',
|
'duration': 2452,
|
||||||
'title': 'Live at Primavera Sound 2011',
|
'timestamp': 1307103164,
|
||||||
|
'upload_date': '20110603',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
m = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
video_id = m.group('videoID')
|
show_id = mobj.group('show_id')
|
||||||
|
episode_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, episode_id)
|
||||||
|
|
||||||
json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
|
episode = self._parse_json(
|
||||||
webpage, 'json data', flags=re.MULTILINE)
|
self._search_regex(
|
||||||
|
r'__INITIAL_STATE__\s*=\s*({.+?})\s*</script>',
|
||||||
|
webpage, 'json data'),
|
||||||
|
episode_id)['episodes'][show_id][episode_id]
|
||||||
|
|
||||||
try:
|
title = episode['title']
|
||||||
data = json.loads(json_data)
|
|
||||||
except ValueError as e:
|
|
||||||
raise ExtractorError('Invalid JSON: ' + str(e))
|
|
||||||
|
|
||||||
video_url = data['akamai_url'] + '&cbr=256'
|
show_title = episode.get('showTitle')
|
||||||
|
if show_title:
|
||||||
|
title = '%s - %s' % (show_title, title)
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': update_url_query(episode['audioURL'], query={'cbr': abr}),
|
||||||
|
'format_id': compat_str(abr),
|
||||||
|
'abr': abr,
|
||||||
|
'vcodec': 'none',
|
||||||
|
} for abr in (96, 128, 256)]
|
||||||
|
|
||||||
|
description = clean_html(episode.get('longTeaser'))
|
||||||
|
thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape'))
|
||||||
|
duration = int_or_none(episode.get('duration'))
|
||||||
|
timestamp = unified_timestamp(episode.get('publishedAt'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': episode_id,
|
||||||
'url': video_url,
|
'title': title,
|
||||||
'title': data['title'],
|
'description': description,
|
||||||
'description': data.get('teaser_text'),
|
'thumbnail': thumbnail,
|
||||||
'location': data.get('country_of_origin'),
|
'duration': duration,
|
||||||
'uploader': data.get('host', {}).get('name'),
|
'timestamp': timestamp,
|
||||||
'uploader_id': data.get('host', {}).get('slug'),
|
'formats': formats,
|
||||||
'thumbnail': data.get('image', {}).get('large_url_2x'),
|
|
||||||
'duration': data.get('duration'),
|
|
||||||
}
|
}
|
||||||
|
50
youtube_dl/extractor/rozhlas.py
Normal file
50
youtube_dl/extractor/rozhlas.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
remove_start,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class RozhlasIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?prehravac\.rozhlas\.cz/audio/(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://prehravac.rozhlas.cz/audio/3421320',
|
||||||
|
'md5': '504c902dbc9e9a1fd50326eccf02a7e2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3421320',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Echo Pavla Klusáka (30.06.2015 21:00)',
|
||||||
|
'description': 'Osmdesátiny Terryho Rileyho jsou skvělou příležitostí proletět se elektronickými i akustickými díly zakladatatele minimalismu, který je aktivní už přes padesát let'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://prehravac.rozhlas.cz/audio/3421320/embed',
|
||||||
|
'skip_download': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
audio_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://prehravac.rozhlas.cz/audio/%s' % audio_id, audio_id)
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<h3>(.+?)</h3>\s*<p[^>]*>.*?</p>\s*<div[^>]+id=["\']player-track',
|
||||||
|
webpage, 'title', default=None) or remove_start(
|
||||||
|
self._og_search_title(webpage), 'Radio Wave - ')
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'<p[^>]+title=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>.*?</p>\s*<div[^>]+id=["\']player-track',
|
||||||
|
webpage, 'description', fatal=False, group='url')
|
||||||
|
duration = int_or_none(self._search_regex(
|
||||||
|
r'data-duration=["\'](\d+)', webpage, 'duration', default=None))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': audio_id,
|
||||||
|
'url': 'http://media.rozhlas.cz/_audio/%s.mp3' % audio_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'vcodec': 'none',
|
||||||
|
}
|
@ -14,10 +14,10 @@ from ..utils import ExtractorError
|
|||||||
class SohuIE(InfoExtractor):
|
class SohuIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
|
_VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
|
||||||
|
|
||||||
|
# Sohu videos give different MD5 sums on Travis CI and my machine
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'note': 'This video is available only in Mainland China',
|
'note': 'This video is available only in Mainland China',
|
||||||
'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
|
'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
|
||||||
'md5': '29175c8cadd8b5cc4055001e85d6b372',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '382479172',
|
'id': '382479172',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -26,7 +26,6 @@ class SohuIE(InfoExtractor):
|
|||||||
'skip': 'On available in China',
|
'skip': 'On available in China',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
|
'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
|
||||||
'md5': '699060e75cf58858dd47fb9c03c42cfb',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '409385080',
|
'id': '409385080',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -34,7 +33,6 @@ class SohuIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
|
'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
|
||||||
'md5': '9bf34be48f2f4dadcb226c74127e203c',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '78693464',
|
'id': '78693464',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -48,7 +46,6 @@ class SohuIE(InfoExtractor):
|
|||||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||||
},
|
},
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
'md5': 'bdbfb8f39924725e6589c146bc1883ad',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '78910339_part1',
|
'id': '78910339_part1',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -56,7 +53,6 @@ class SohuIE(InfoExtractor):
|
|||||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'md5': '3e1f46aaeb95354fd10e7fca9fc1804e',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '78910339_part2',
|
'id': '78910339_part2',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -64,7 +60,6 @@ class SohuIE(InfoExtractor):
|
|||||||
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'md5': '8407e634175fdac706766481b9443450',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '78910339_part3',
|
'id': '78910339_part3',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
34
youtube_dl/extractor/sonyliv.py
Normal file
34
youtube_dl/extractor/sonyliv.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class SonyLIVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?sonyliv\.com/details/[^/]+/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': "http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor's-Delight",
|
||||||
|
'info_dict': {
|
||||||
|
'title': "Ep. 1 - Achaari Cheese Toast - Bachelor's Delight",
|
||||||
|
'id': '5024612095001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20160707',
|
||||||
|
'description': 'md5:7f28509a148d5be9d0782b4d5106410d',
|
||||||
|
'uploader_id': '4338955589001',
|
||||||
|
'timestamp': 1467870968,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': ['BrightcoveNew'],
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.sonyliv.com/details/full%20movie/4951168986001/Sei-Raat-(Bangla)',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
brightcove_id = self._match_id(url)
|
||||||
|
return self.url_result(
|
||||||
|
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
@ -118,8 +118,12 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
|
|||||||
xpath_text(cfg_xml, './startThumb', 'thumbnail'), 'http:')
|
xpath_text(cfg_xml, './startThumb', 'thumbnail'), 'http:')
|
||||||
thumbnails = self._extract_thumbnails(cfg_xml)
|
thumbnails = self._extract_thumbnails(cfg_xml)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = None
|
||||||
self._TITLE_REGEX, webpage, 'title') if self._TITLE_REGEX else self._og_search_title(webpage)
|
if self._TITLE_REGEX:
|
||||||
|
title = self._html_search_regex(
|
||||||
|
self._TITLE_REGEX, webpage, 'title', default=None)
|
||||||
|
if not title:
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
|
||||||
age_limit = self._rta_search(webpage) or 18
|
age_limit = self._rta_search(webpage) or 18
|
||||||
|
|
||||||
@ -189,9 +193,9 @@ class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE):
|
|||||||
class TNAFlixIE(TNAFlixNetworkBaseIE):
|
class TNAFlixIE(TNAFlixNetworkBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?tnaflix\.com/[^/]+/(?P<display_id>[^/]+)/video(?P<id>\d+)'
|
||||||
|
|
||||||
_TITLE_REGEX = r'<title>(.+?) - TNAFlix Porn Videos</title>'
|
_TITLE_REGEX = r'<title>(.+?) - (?:TNAFlix Porn Videos|TNAFlix\.com)</title>'
|
||||||
_DESCRIPTION_REGEX = r'<meta[^>]+name="description"[^>]+content="([^"]+)"'
|
_DESCRIPTION_REGEX = r'(?s)>Description:</[^>]+>(.+?)<'
|
||||||
_UPLOADER_REGEX = r'<i>\s*Verified Member\s*</i>\s*<h1>(.+?)</h1>'
|
_UPLOADER_REGEX = r'<i>\s*Verified Member\s*</i>\s*<h\d+>(.+?)<'
|
||||||
_CATEGORIES_REGEX = r'(?s)<span[^>]*>Categories:</span>(.+?)</div>'
|
_CATEGORIES_REGEX = r'(?s)<span[^>]*>Categories:</span>(.+?)</div>'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -24,6 +24,7 @@ class TVPIE(InfoExtractor):
|
|||||||
'id': '194536',
|
'id': '194536',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Czas honoru, I seria – odc. 13',
|
'title': 'Czas honoru, I seria – odc. 13',
|
||||||
|
'description': 'md5:76649d2014f65c99477be17f23a4dead',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
|
'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176',
|
||||||
@ -32,6 +33,16 @@ class TVPIE(InfoExtractor):
|
|||||||
'id': '17916176',
|
'id': '17916176',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata',
|
'title': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata',
|
||||||
|
'description': 'TVP Gorzów pokaże filmy studentów z podroży dookoła świata',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# page id is not the same as video id(#7799)
|
||||||
|
'url': 'http://vod.tvp.pl/22704887/08122015-1500',
|
||||||
|
'md5': 'cf6a4705dfd1489aef8deb168d6ba742',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '22680786',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Wiadomości, 08.12.2015, 15:00',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
|
'url': 'http://vod.tvp.pl/seriale/obyczajowe/na-sygnale/sezon-2-27-/odc-39/17834272',
|
||||||
@ -53,6 +64,39 @@ class TVPIE(InfoExtractor):
|
|||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
page_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, page_id)
|
||||||
|
video_id = self._search_regex([
|
||||||
|
r'<iframe[^>]+src="[^"]*?object_id=(\d+)',
|
||||||
|
"object_id\s*:\s*'(\d+)'"], webpage, 'video id')
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': 'tvp:' + video_id,
|
||||||
|
'description': self._og_search_description(webpage, default=None),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'ie_key': 'TVPEmbed',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TVPEmbedIE(InfoExtractor):
|
||||||
|
IE_NAME = 'tvp:embed'
|
||||||
|
IE_DESC = 'Telewizja Polska'
|
||||||
|
_VALID_URL = r'(?:tvp:|https?://[^/]+\.tvp\.(?:pl|info)/sess/tvplayer\.php\?.*?object_id=)(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.tvp.pl/sess/tvplayer.php?object_id=22670268',
|
||||||
|
'md5': '8c9cd59d16edabf39331f93bf8a766c7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '22670268',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Panorama, 07.12.2015, 15:40',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'tvp:22670268',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
@ -4,13 +4,18 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import (
|
||||||
|
compat_HTTPError,
|
||||||
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
qualities,
|
qualities,
|
||||||
determine_ext,
|
|
||||||
update_url_query,
|
update_url_query,
|
||||||
int_or_none,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -34,6 +39,9 @@ class TVPlayIE(InfoExtractor):
|
|||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Kādi ir īri? - Viņas melo labāk',
|
'title': 'Kādi ir īri? - Viņas melo labāk',
|
||||||
'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.',
|
'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.',
|
||||||
|
'series': 'Viņas melo labāk',
|
||||||
|
'season': '2.sezona',
|
||||||
|
'season_number': 2,
|
||||||
'duration': 25,
|
'duration': 25,
|
||||||
'timestamp': 1406097056,
|
'timestamp': 1406097056,
|
||||||
'upload_date': '20140723',
|
'upload_date': '20140723',
|
||||||
@ -46,6 +54,10 @@ class TVPlayIE(InfoExtractor):
|
|||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Moterys meluoja geriau',
|
'title': 'Moterys meluoja geriau',
|
||||||
'description': 'md5:9aec0fc68e2cbc992d2a140bd41fa89e',
|
'description': 'md5:9aec0fc68e2cbc992d2a140bd41fa89e',
|
||||||
|
'series': 'Moterys meluoja geriau',
|
||||||
|
'episode_number': 47,
|
||||||
|
'season': '1 sezonas',
|
||||||
|
'season_number': 1,
|
||||||
'duration': 1330,
|
'duration': 1330,
|
||||||
'timestamp': 1403769181,
|
'timestamp': 1403769181,
|
||||||
'upload_date': '20140626',
|
'upload_date': '20140626',
|
||||||
@ -196,12 +208,15 @@ class TVPlayIE(InfoExtractor):
|
|||||||
|
|
||||||
title = video['title']
|
title = video['title']
|
||||||
|
|
||||||
if video.get('is_geo_blocked'):
|
try:
|
||||||
self.report_warning(
|
streams = self._download_json(
|
||||||
'This content might not be available in your country due to copyright reasons')
|
'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id,
|
||||||
|
video_id, 'Downloading streams JSON')
|
||||||
streams = self._download_json(
|
except ExtractorError as e:
|
||||||
'http://playapi.mtgx.tv/v1/videos/stream/%s' % video_id, video_id, 'Downloading streams JSON')
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
|
msg = self._parse_json(e.cause.read().decode('utf-8'), video_id)
|
||||||
|
raise ExtractorError(msg['msg'], expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
quality = qualities(['hls', 'medium', 'high'])
|
quality = qualities(['hls', 'medium', 'high'])
|
||||||
formats = []
|
formats = []
|
||||||
@ -226,7 +241,8 @@ class TVPlayIE(InfoExtractor):
|
|||||||
'ext': ext,
|
'ext': ext,
|
||||||
}
|
}
|
||||||
if video_url.startswith('rtmp'):
|
if video_url.startswith('rtmp'):
|
||||||
m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
|
m = re.search(
|
||||||
|
r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
|
||||||
if not m:
|
if not m:
|
||||||
continue
|
continue
|
||||||
fmt.update({
|
fmt.update({
|
||||||
@ -240,15 +256,41 @@ class TVPlayIE(InfoExtractor):
|
|||||||
'url': video_url,
|
'url': video_url,
|
||||||
})
|
})
|
||||||
formats.append(fmt)
|
formats.append(fmt)
|
||||||
|
|
||||||
|
if not formats and video.get('is_geo_blocked'):
|
||||||
|
self.raise_geo_restricted(
|
||||||
|
'This content might not be available in your country due to copyright reasons')
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
# TODO: webvtt in m3u8
|
||||||
|
subtitles = {}
|
||||||
|
sami_path = video.get('sami_path')
|
||||||
|
if sami_path:
|
||||||
|
lang = self._search_regex(
|
||||||
|
r'_([a-z]{2})\.xml', sami_path, 'lang',
|
||||||
|
default=compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1])
|
||||||
|
subtitles[lang] = [{
|
||||||
|
'url': sami_path,
|
||||||
|
}]
|
||||||
|
|
||||||
|
series = video.get('format_title')
|
||||||
|
episode_number = int_or_none(video.get('format_position', {}).get('episode'))
|
||||||
|
season = video.get('_embedded', {}).get('season', {}).get('title')
|
||||||
|
season_number = int_or_none(video.get('format_position', {}).get('season'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': video.get('description'),
|
'description': video.get('description'),
|
||||||
|
'series': series,
|
||||||
|
'episode_number': episode_number,
|
||||||
|
'season': season,
|
||||||
|
'season_number': season_number,
|
||||||
'duration': int_or_none(video.get('duration')),
|
'duration': int_or_none(video.get('duration')),
|
||||||
'timestamp': parse_iso8601(video.get('created_at')),
|
'timestamp': parse_iso8601(video.get('created_at')),
|
||||||
'view_count': int_or_none(video.get('views', {}).get('total')),
|
'view_count': int_or_none(video.get('views', {}).get('total')),
|
||||||
'age_limit': int_or_none(video.get('age_limit', 0)),
|
'age_limit': int_or_none(video.get('age_limit', 0)),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
58
youtube_dl/extractor/vodplatform.py
Normal file
58
youtube_dl/extractor/vodplatform.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import unescapeHTML
|
||||||
|
|
||||||
|
|
||||||
|
class VODPlatformIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?vod-platform\.net/embed/(?P<id>[^/?#]+)'
|
||||||
|
_TEST = {
|
||||||
|
# from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar
|
||||||
|
'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw',
|
||||||
|
'md5': '1db2b7249ce383d6be96499006e951fc',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'RufMcytHDolTH1MuKHY9Fw',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'LBCi News_ النصرة في ضيافة الـ "سي.أن.أن"',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = unescapeHTML(self._og_search_title(webpage))
|
||||||
|
hidden_inputs = self._hidden_inputs(webpage)
|
||||||
|
|
||||||
|
base_url = self._search_regex(
|
||||||
|
'(.*/)(?:playlist.m3u8|manifest.mpd)',
|
||||||
|
hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'],
|
||||||
|
'base url')
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
base_url + 'playlist.m3u8', video_id, 'mp4',
|
||||||
|
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
base_url + 'manifest.mpd', video_id,
|
||||||
|
mpd_id='dash', fatal=False))
|
||||||
|
rtmp_formats = self._extract_smil_formats(
|
||||||
|
base_url + 'jwplayer.smil', video_id, fatal=False)
|
||||||
|
for rtmp_format in rtmp_formats:
|
||||||
|
rtsp_format = rtmp_format.copy()
|
||||||
|
rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
|
||||||
|
del rtsp_format['play_path']
|
||||||
|
del rtsp_format['ext']
|
||||||
|
rtsp_format.update({
|
||||||
|
'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
|
||||||
|
'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
|
||||||
|
'protocol': 'rtsp',
|
||||||
|
})
|
||||||
|
formats.extend([rtmp_format, rtsp_format])
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': hidden_inputs.get('HiddenThumbnail') or self._og_search_thumbnail(webpage),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -499,9 +499,20 @@ def parseOpts(overrideArguments=None):
|
|||||||
dest='bidi_workaround', action='store_true',
|
dest='bidi_workaround', action='store_true',
|
||||||
help='Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
|
help='Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
|
||||||
workarounds.add_option(
|
workarounds.add_option(
|
||||||
'--sleep-interval', metavar='SECONDS',
|
'--sleep-interval', '--min-sleep-interval', metavar='SECONDS',
|
||||||
dest='sleep_interval', type=float,
|
dest='sleep_interval', type=float,
|
||||||
help='Number of seconds to sleep before each download.')
|
help=(
|
||||||
|
'Number of seconds to sleep before each download when used alone '
|
||||||
|
'or a lower bound of a range for randomized sleep before each download '
|
||||||
|
'(minimum possible number of seconds to sleep) when used along with '
|
||||||
|
'--max-sleep-interval.'))
|
||||||
|
workarounds.add_option(
|
||||||
|
'--max-sleep-interval', metavar='SECONDS',
|
||||||
|
dest='max_sleep_interval', type=float,
|
||||||
|
help=(
|
||||||
|
'Upper bound of a range for randomized sleep before each download '
|
||||||
|
'(maximum possible number of seconds to sleep). Must only be used '
|
||||||
|
'along with --min-sleep-interval.'))
|
||||||
|
|
||||||
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
|
verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
|
||||||
verbosity.add_option(
|
verbosity.add_option(
|
||||||
|
@ -3,11 +3,6 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import PostProcessor
|
from .common import PostProcessor
|
||||||
from ..utils import PostProcessingError
|
|
||||||
|
|
||||||
|
|
||||||
class MetadataFromTitlePPError(PostProcessingError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class MetadataFromTitlePP(PostProcessor):
|
class MetadataFromTitlePP(PostProcessor):
|
||||||
@ -38,7 +33,8 @@ class MetadataFromTitlePP(PostProcessor):
|
|||||||
title = info['title']
|
title = info['title']
|
||||||
match = re.match(self._titleregex, title)
|
match = re.match(self._titleregex, title)
|
||||||
if match is None:
|
if match is None:
|
||||||
raise MetadataFromTitlePPError('Could not interpret title of video as "%s"' % self._titleformat)
|
self._downloader.to_screen('[fromtitle] Could not interpret title of video as "%s"' % self._titleformat)
|
||||||
|
return [], info
|
||||||
for attribute, value in match.groupdict().items():
|
for attribute, value in match.groupdict().items():
|
||||||
value = match.group(attribute)
|
value = match.group(attribute)
|
||||||
info[attribute] = value
|
info[attribute] = value
|
||||||
|
@ -47,6 +47,7 @@ from .compat import (
|
|||||||
compat_socket_create_connection,
|
compat_socket_create_connection,
|
||||||
compat_str,
|
compat_str,
|
||||||
compat_struct_pack,
|
compat_struct_pack,
|
||||||
|
compat_struct_unpack,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_parse,
|
compat_urllib_parse,
|
||||||
compat_urllib_parse_urlencode,
|
compat_urllib_parse_urlencode,
|
||||||
@ -1101,7 +1102,7 @@ def unified_timestamp(date_str, day_first=True):
|
|||||||
|
|
||||||
date_str = date_str.replace(',', ' ')
|
date_str = date_str.replace(',', ' ')
|
||||||
|
|
||||||
pm_delta = datetime.timedelta(hours=12 if re.search(r'(?i)PM', date_str) else 0)
|
pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
|
||||||
timezone, date_str = extract_timezone(date_str)
|
timezone, date_str = extract_timezone(date_str)
|
||||||
|
|
||||||
# Remove AM/PM + timezone
|
# Remove AM/PM + timezone
|
||||||
@ -1109,13 +1110,13 @@ def unified_timestamp(date_str, day_first=True):
|
|||||||
|
|
||||||
for expression in date_formats(day_first):
|
for expression in date_formats(day_first):
|
||||||
try:
|
try:
|
||||||
dt = datetime.datetime.strptime(date_str, expression) - timezone + pm_delta
|
dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
|
||||||
return calendar.timegm(dt.timetuple())
|
return calendar.timegm(dt.timetuple())
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
timetuple = email.utils.parsedate_tz(date_str)
|
timetuple = email.utils.parsedate_tz(date_str)
|
||||||
if timetuple:
|
if timetuple:
|
||||||
return calendar.timegm(timetuple.timetuple())
|
return calendar.timegm(timetuple) + pm_delta * 3600
|
||||||
|
|
||||||
|
|
||||||
def determine_ext(url, default_ext='unknown_video'):
|
def determine_ext(url, default_ext='unknown_video'):
|
||||||
@ -1983,11 +1984,27 @@ US_RATINGS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
TV_PARENTAL_GUIDELINES = {
|
||||||
|
'TV-Y': 0,
|
||||||
|
'TV-Y7': 7,
|
||||||
|
'TV-G': 0,
|
||||||
|
'TV-PG': 0,
|
||||||
|
'TV-14': 14,
|
||||||
|
'TV-MA': 17,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def parse_age_limit(s):
|
def parse_age_limit(s):
|
||||||
if s is None:
|
if type(s) == int:
|
||||||
|
return s if 0 <= s <= 21 else None
|
||||||
|
if not isinstance(s, compat_basestring):
|
||||||
return None
|
return None
|
||||||
m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
|
m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
|
||||||
return int(m.group('age')) if m else US_RATINGS.get(s)
|
if m:
|
||||||
|
return int(m.group('age'))
|
||||||
|
if s in US_RATINGS:
|
||||||
|
return US_RATINGS[s]
|
||||||
|
return TV_PARENTAL_GUIDELINES.get(s)
|
||||||
|
|
||||||
|
|
||||||
def strip_jsonp(code):
|
def strip_jsonp(code):
|
||||||
@ -2971,3 +2988,110 @@ def parse_m3u8_attributes(attrib):
|
|||||||
|
|
||||||
def urshift(val, n):
|
def urshift(val, n):
|
||||||
return val >> n if val >= 0 else (val + 0x100000000) >> n
|
return val >> n if val >= 0 else (val + 0x100000000) >> n
|
||||||
|
|
||||||
|
|
||||||
|
# Based on png2str() written by @gdkchan and improved by @yokrysty
|
||||||
|
# Originally posted at https://github.com/rg3/youtube-dl/issues/9706
|
||||||
|
def decode_png(png_data):
|
||||||
|
# Reference: https://www.w3.org/TR/PNG/
|
||||||
|
header = png_data[8:]
|
||||||
|
|
||||||
|
if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
|
||||||
|
raise IOError('Not a valid PNG file.')
|
||||||
|
|
||||||
|
int_map = {1: '>B', 2: '>H', 4: '>I'}
|
||||||
|
unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
|
||||||
|
|
||||||
|
chunks = []
|
||||||
|
|
||||||
|
while header:
|
||||||
|
length = unpack_integer(header[:4])
|
||||||
|
header = header[4:]
|
||||||
|
|
||||||
|
chunk_type = header[:4]
|
||||||
|
header = header[4:]
|
||||||
|
|
||||||
|
chunk_data = header[:length]
|
||||||
|
header = header[length:]
|
||||||
|
|
||||||
|
header = header[4:] # Skip CRC
|
||||||
|
|
||||||
|
chunks.append({
|
||||||
|
'type': chunk_type,
|
||||||
|
'length': length,
|
||||||
|
'data': chunk_data
|
||||||
|
})
|
||||||
|
|
||||||
|
ihdr = chunks[0]['data']
|
||||||
|
|
||||||
|
width = unpack_integer(ihdr[:4])
|
||||||
|
height = unpack_integer(ihdr[4:8])
|
||||||
|
|
||||||
|
idat = b''
|
||||||
|
|
||||||
|
for chunk in chunks:
|
||||||
|
if chunk['type'] == b'IDAT':
|
||||||
|
idat += chunk['data']
|
||||||
|
|
||||||
|
if not idat:
|
||||||
|
raise IOError('Unable to read PNG data.')
|
||||||
|
|
||||||
|
decompressed_data = bytearray(zlib.decompress(idat))
|
||||||
|
|
||||||
|
stride = width * 3
|
||||||
|
pixels = []
|
||||||
|
|
||||||
|
def _get_pixel(idx):
|
||||||
|
x = idx % stride
|
||||||
|
y = idx // stride
|
||||||
|
return pixels[y][x]
|
||||||
|
|
||||||
|
for y in range(height):
|
||||||
|
basePos = y * (1 + stride)
|
||||||
|
filter_type = decompressed_data[basePos]
|
||||||
|
|
||||||
|
current_row = []
|
||||||
|
|
||||||
|
pixels.append(current_row)
|
||||||
|
|
||||||
|
for x in range(stride):
|
||||||
|
color = decompressed_data[1 + basePos + x]
|
||||||
|
basex = y * stride + x
|
||||||
|
left = 0
|
||||||
|
up = 0
|
||||||
|
|
||||||
|
if x > 2:
|
||||||
|
left = _get_pixel(basex - 3)
|
||||||
|
if y > 0:
|
||||||
|
up = _get_pixel(basex - stride)
|
||||||
|
|
||||||
|
if filter_type == 1: # Sub
|
||||||
|
color = (color + left) & 0xff
|
||||||
|
elif filter_type == 2: # Up
|
||||||
|
color = (color + up) & 0xff
|
||||||
|
elif filter_type == 3: # Average
|
||||||
|
color = (color + ((left + up) >> 1)) & 0xff
|
||||||
|
elif filter_type == 4: # Paeth
|
||||||
|
a = left
|
||||||
|
b = up
|
||||||
|
c = 0
|
||||||
|
|
||||||
|
if x > 2 and y > 0:
|
||||||
|
c = _get_pixel(basex - stride - 3)
|
||||||
|
|
||||||
|
p = a + b - c
|
||||||
|
|
||||||
|
pa = abs(p - a)
|
||||||
|
pb = abs(p - b)
|
||||||
|
pc = abs(p - c)
|
||||||
|
|
||||||
|
if pa <= pb and pa <= pc:
|
||||||
|
color = (color + a) & 0xff
|
||||||
|
elif pb <= pc:
|
||||||
|
color = (color + b) & 0xff
|
||||||
|
else:
|
||||||
|
color = (color + c) & 0xff
|
||||||
|
|
||||||
|
current_row.append(color)
|
||||||
|
|
||||||
|
return width, height, pixels
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2016.08.01'
|
__version__ = '2016.08.07'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user