Merge branch 'master' of https://github.com/ytdl-org/youtube-dl
This commit is contained in:
commit
05899a47d4
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
6
.github/ISSUE_TEMPLATE/1_broken_site.md
vendored
@ -18,7 +18,7 @@ title: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a broken site support
|
- [ ] I'm reporting a broken site support
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2020.01.24**
|
- [ ] I've verified that I'm running youtube-dl version **2020.03.08**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||||
- [ ] I've searched the bugtracker for similar issues including closed ones
|
- [ ] I've searched the bugtracker for similar issues including closed ones
|
||||||
@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2020.01.24
|
[debug] youtube-dl version 2020.03.08
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
@ -19,7 +19,7 @@ labels: 'site-support-request'
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
|
||||||
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a new site support request
|
- [ ] I'm reporting a new site support request
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2020.01.24**
|
- [ ] I've verified that I'm running youtube-dl version **2020.03.08**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that none of provided URLs violate any copyrights
|
- [ ] I've checked that none of provided URLs violate any copyrights
|
||||||
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
- [ ] I've searched the bugtracker for similar site support requests including closed ones
|
||||||
|
@ -18,13 +18,13 @@ title: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
- Finally, put x into all relevant boxes (like this [x])
|
- Finally, put x into all relevant boxes (like this [x])
|
||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a site feature request
|
- [ ] I'm reporting a site feature request
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2020.01.24**
|
- [ ] I've verified that I'm running youtube-dl version **2020.03.08**
|
||||||
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
|
||||||
|
|
||||||
|
|
||||||
|
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
6
.github/ISSUE_TEMPLATE/4_bug_report.md
vendored
@ -18,7 +18,7 @@ title: ''
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
|
||||||
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
|
||||||
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
|
|||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a broken site support issue
|
- [ ] I'm reporting a broken site support issue
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2020.01.24**
|
- [ ] I've verified that I'm running youtube-dl version **2020.03.08**
|
||||||
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
- [ ] I've checked that all provided URLs are alive and playable in a browser
|
||||||
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
|
||||||
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
- [ ] I've searched the bugtracker for similar bug reports including closed ones
|
||||||
@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2020.01.24
|
[debug] youtube-dl version 2020.03.08
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
4
.github/ISSUE_TEMPLATE/5_feature_request.md
vendored
@ -19,13 +19,13 @@ labels: 'request'
|
|||||||
|
|
||||||
<!--
|
<!--
|
||||||
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
|
||||||
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.01.24. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.03.08. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
|
||||||
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
|
||||||
- Finally, put x into all relevant boxes (like this [x])
|
- Finally, put x into all relevant boxes (like this [x])
|
||||||
-->
|
-->
|
||||||
|
|
||||||
- [ ] I'm reporting a feature request
|
- [ ] I'm reporting a feature request
|
||||||
- [ ] I've verified that I'm running youtube-dl version **2020.01.24**
|
- [ ] I've verified that I'm running youtube-dl version **2020.03.08**
|
||||||
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
- [ ] I've searched the bugtracker for similar feature requests including closed ones
|
||||||
|
|
||||||
|
|
||||||
|
78
ChangeLog
78
ChangeLog
@ -1,3 +1,81 @@
|
|||||||
|
version 2020.03.08
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [utils] Add support for cookie files with spaces
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [pornhub] Add support for pornhubpremium.com (#24288)
|
||||||
|
- [youtube] Remove outdated code and unnecessary requests
|
||||||
|
* [youtube] Improve extraction in 429 HTTP error conditions (#24283)
|
||||||
|
* [nhk] Update API version (#24270)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.03.06
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [youtube] Fix age-gated videos support without login (#24248)
|
||||||
|
* [vimeo] Fix showcase password protected video extraction (#24224)
|
||||||
|
* [pornhub] Improve title extraction (#24184)
|
||||||
|
* [peertube] Improve extraction (#23657)
|
||||||
|
+ [servus] Add support for new URL schema (#23475, #23583, #24142)
|
||||||
|
* [vimeo] Fix subtitles URLs (#24209)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.03.01
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [YoutubeDL] Force redirect URL to unicode on python 2
|
||||||
|
- [options] Remove duplicate short option -v for --version (#24162)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [xhamster] Fix extraction (#24205)
|
||||||
|
* [franceculture] Fix extraction (#24204)
|
||||||
|
+ [telecinco] Add support for article opening videos
|
||||||
|
* [telecinco] Fix extraction (#24195)
|
||||||
|
* [xtube] Fix metadata extraction (#21073, #22455)
|
||||||
|
* [youjizz] Fix extraction (#24181)
|
||||||
|
- Remove no longer needed compat_str around geturl
|
||||||
|
* [pornhd] Fix extraction (#24128)
|
||||||
|
+ [teachable] Add support for multiple videos per lecture (#24101)
|
||||||
|
+ [wistia] Add support for multiple generic embeds (#8347, 11385)
|
||||||
|
* [imdb] Fix extraction (#23443)
|
||||||
|
* [tv2dk:bornholm:play] Fix extraction (#24076)
|
||||||
|
|
||||||
|
|
||||||
|
version 2020.02.16
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [YoutubeDL] Fix playlist entry indexing with --playlist-items (#10591,
|
||||||
|
#10622)
|
||||||
|
* [update] Fix updating via symlinks (#23991)
|
||||||
|
+ [compat] Introduce compat_realpath (#23991)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [npr] Add support for streams (#24042)
|
||||||
|
+ [24video] Add support for porn.24video.net (#23779, #23784)
|
||||||
|
- [jpopsuki] Remove extractor (#23858)
|
||||||
|
* [nova] Improve extraction (#23690)
|
||||||
|
* [nova:embed] Improve (#23690)
|
||||||
|
* [nova:embed] Fix extraction (#23672)
|
||||||
|
+ [abc:iview] Add support for 720p (#22907, #22921)
|
||||||
|
* [nytimes] Improve format sorting (#24010)
|
||||||
|
+ [toggle] Add support for mewatch.sg (#23895, #23930)
|
||||||
|
* [thisoldhouse] Fix extraction (#23951)
|
||||||
|
+ [popcorntimes] Add support for popcorntimes.tv (#23949)
|
||||||
|
* [sportdeutschland] Update to new API
|
||||||
|
* [twitch:stream] Lowercase channel id for stream request (#23917)
|
||||||
|
* [tv5mondeplus] Fix extraction (#23907, #23911)
|
||||||
|
* [tva] Relax URL regular expression (#23903)
|
||||||
|
* [vimeo] Fix album extraction (#23864)
|
||||||
|
* [viewlift] Improve extraction
|
||||||
|
* Fix extraction (#23851)
|
||||||
|
+ Add support for authentication
|
||||||
|
+ Add support for more domains
|
||||||
|
* [svt] Fix series extraction (#22297)
|
||||||
|
* [svt] Fix article extraction (#22897, #22919)
|
||||||
|
* [soundcloud] Imporve private playlist/set tracks extraction (#3707)
|
||||||
|
|
||||||
|
|
||||||
version 2020.01.24
|
version 2020.01.24
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
|
@ -835,7 +835,9 @@ In February 2015, the new YouTube player contained a character sequence in a str
|
|||||||
|
|
||||||
### HTTP Error 429: Too Many Requests or 402: Payment Required
|
### HTTP Error 429: Too Many Requests or 402: Payment Required
|
||||||
|
|
||||||
These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address.
|
These two error codes indicate that the service is blocking your IP address because of overuse. Usually this is a soft block meaning that you can gain access again after solving CAPTCHA. Just open a browser and solve a CAPTCHA the service suggests you and after that [pass cookies](#how-do-i-pass-cookies-to-youtube-dl) to youtube-dl. Note that if your machine has multiple external IPs then you should also pass exactly the same IP you've used for solving CAPTCHA with [`--source-address`](#network-options). Also you may need to pass a `User-Agent` HTTP header of your browser with [`--user-agent`](#workarounds).
|
||||||
|
|
||||||
|
If this is not the case (no CAPTCHA suggested to solve by the service) then you can contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address.
|
||||||
|
|
||||||
### SyntaxError: Non-ASCII character
|
### SyntaxError: Non-ASCII character
|
||||||
|
|
||||||
|
@ -389,7 +389,6 @@
|
|||||||
- **JeuxVideo**
|
- **JeuxVideo**
|
||||||
- **Joj**
|
- **Joj**
|
||||||
- **Jove**
|
- **Jove**
|
||||||
- **jpopsuki.tv**
|
|
||||||
- **JWPlatform**
|
- **JWPlatform**
|
||||||
- **Kakao**
|
- **Kakao**
|
||||||
- **Kaltura**
|
- **Kaltura**
|
||||||
@ -663,6 +662,7 @@
|
|||||||
- **Pokemon**
|
- **Pokemon**
|
||||||
- **PolskieRadio**
|
- **PolskieRadio**
|
||||||
- **PolskieRadioCategory**
|
- **PolskieRadioCategory**
|
||||||
|
- **Popcorntimes**
|
||||||
- **PopcornTV**
|
- **PopcornTV**
|
||||||
- **PornCom**
|
- **PornCom**
|
||||||
- **PornerBros**
|
- **PornerBros**
|
||||||
@ -1004,8 +1004,8 @@
|
|||||||
- **Vidzi**
|
- **Vidzi**
|
||||||
- **vier**: vier.be and vijf.be
|
- **vier**: vier.be and vijf.be
|
||||||
- **vier:videos**
|
- **vier:videos**
|
||||||
- **ViewLift**
|
- **viewlift**
|
||||||
- **ViewLiftEmbed**
|
- **viewlift:embed**
|
||||||
- **Viidea**
|
- **Viidea**
|
||||||
- **viki**
|
- **viki**
|
||||||
- **viki:channel**
|
- **viki:channel**
|
||||||
|
@ -816,11 +816,15 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
'webpage_url': 'http://example.com',
|
'webpage_url': 'http://example.com',
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_ids(params):
|
def get_downloaded_info_dicts(params):
|
||||||
ydl = YDL(params)
|
ydl = YDL(params)
|
||||||
# make a copy because the dictionary can be modified
|
# make a deep copy because the dictionary and nested entries
|
||||||
ydl.process_ie_result(playlist.copy())
|
# can be modified
|
||||||
return [int(v['id']) for v in ydl.downloaded_info_dicts]
|
ydl.process_ie_result(copy.deepcopy(playlist))
|
||||||
|
return ydl.downloaded_info_dicts
|
||||||
|
|
||||||
|
def get_ids(params):
|
||||||
|
return [int(v['id']) for v in get_downloaded_info_dicts(params)]
|
||||||
|
|
||||||
result = get_ids({})
|
result = get_ids({})
|
||||||
self.assertEqual(result, [1, 2, 3, 4])
|
self.assertEqual(result, [1, 2, 3, 4])
|
||||||
@ -852,6 +856,22 @@ class TestYoutubeDL(unittest.TestCase):
|
|||||||
result = get_ids({'playlist_items': '2-4,3-4,3'})
|
result = get_ids({'playlist_items': '2-4,3-4,3'})
|
||||||
self.assertEqual(result, [2, 3, 4])
|
self.assertEqual(result, [2, 3, 4])
|
||||||
|
|
||||||
|
# Tests for https://github.com/ytdl-org/youtube-dl/issues/10591
|
||||||
|
# @{
|
||||||
|
result = get_downloaded_info_dicts({'playlist_items': '2-4,3-4,3'})
|
||||||
|
self.assertEqual(result[0]['playlist_index'], 2)
|
||||||
|
self.assertEqual(result[1]['playlist_index'], 3)
|
||||||
|
|
||||||
|
result = get_downloaded_info_dicts({'playlist_items': '2-4,3-4,3'})
|
||||||
|
self.assertEqual(result[0]['playlist_index'], 2)
|
||||||
|
self.assertEqual(result[1]['playlist_index'], 3)
|
||||||
|
self.assertEqual(result[2]['playlist_index'], 4)
|
||||||
|
|
||||||
|
result = get_downloaded_info_dicts({'playlist_items': '4,2'})
|
||||||
|
self.assertEqual(result[0]['playlist_index'], 4)
|
||||||
|
self.assertEqual(result[1]['playlist_index'], 2)
|
||||||
|
# @}
|
||||||
|
|
||||||
def test_urlopen_no_file_protocol(self):
|
def test_urlopen_no_file_protocol(self):
|
||||||
# see https://github.com/ytdl-org/youtube-dl/issues/8227
|
# see https://github.com/ytdl-org/youtube-dl/issues/8227
|
||||||
ydl = YDL()
|
ydl = YDL()
|
||||||
|
@ -14,6 +14,9 @@ from youtube_dl.utils import YoutubeDLCookieJar
|
|||||||
|
|
||||||
|
|
||||||
class TestYoutubeDLCookieJar(unittest.TestCase):
|
class TestYoutubeDLCookieJar(unittest.TestCase):
|
||||||
|
def __assert_cookie_has_value(self, cookiejar, key):
|
||||||
|
self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE')
|
||||||
|
|
||||||
def test_keep_session_cookies(self):
|
def test_keep_session_cookies(self):
|
||||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
|
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt')
|
||||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||||
@ -32,12 +35,13 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
|
|||||||
def test_strip_httponly_prefix(self):
|
def test_strip_httponly_prefix(self):
|
||||||
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
|
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
|
||||||
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||||
|
self.__assert_cookie_has_value(cookiejar, 'HTTPONLY_COOKIE')
|
||||||
|
self.__assert_cookie_has_value(cookiejar, 'JS_ACCESSIBLE_COOKIE')
|
||||||
|
|
||||||
def assert_cookie_has_value(key):
|
def test_convert_spaces_to_tabs(self):
|
||||||
self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE')
|
cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/cookie_file_with_spaces.txt')
|
||||||
|
cookiejar.load(ignore_discard=True, ignore_expires=True)
|
||||||
assert_cookie_has_value('HTTPONLY_COOKIE')
|
self.__assert_cookie_has_value(cookiejar, 'COOKIE')
|
||||||
assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -26,7 +26,6 @@ from youtube_dl.extractor import (
|
|||||||
ThePlatformIE,
|
ThePlatformIE,
|
||||||
ThePlatformFeedIE,
|
ThePlatformFeedIE,
|
||||||
RTVEALaCartaIE,
|
RTVEALaCartaIE,
|
||||||
FunnyOrDieIE,
|
|
||||||
DemocracynowIE,
|
DemocracynowIE,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -322,18 +321,6 @@ class TestRtveSubtitles(BaseTestSubtitles):
|
|||||||
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
|
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
|
||||||
|
|
||||||
|
|
||||||
class TestFunnyOrDieSubtitles(BaseTestSubtitles):
|
|
||||||
url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
|
|
||||||
IE = FunnyOrDieIE
|
|
||||||
|
|
||||||
def test_allsubtitles(self):
|
|
||||||
self.DL.params['writesubtitles'] = True
|
|
||||||
self.DL.params['allsubtitles'] = True
|
|
||||||
subtitles = self.getSubtitles()
|
|
||||||
self.assertEqual(set(subtitles.keys()), set(['en']))
|
|
||||||
self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
|
|
||||||
|
|
||||||
|
|
||||||
class TestDemocracynowSubtitles(BaseTestSubtitles):
|
class TestDemocracynowSubtitles(BaseTestSubtitles):
|
||||||
url = 'http://www.democracynow.org/shows/2015/7/3'
|
url = 'http://www.democracynow.org/shows/2015/7/3'
|
||||||
IE = DemocracynowIE
|
IE = DemocracynowIE
|
||||||
|
5
test/testdata/cookies/cookie_file_with_spaces.txt
vendored
Normal file
5
test/testdata/cookies/cookie_file_with_spaces.txt
vendored
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# Netscape HTTP Cookie File
|
||||||
|
# http://curl.haxx.se/rfc/cookie_spec.html
|
||||||
|
# This is a generated file! Do not edit.
|
||||||
|
|
||||||
|
www.foobar.foobar FALSE / TRUE 2147483647 COOKIE COOKIE_VALUE
|
@ -92,6 +92,7 @@ from .utils import (
|
|||||||
YoutubeDLCookieJar,
|
YoutubeDLCookieJar,
|
||||||
YoutubeDLCookieProcessor,
|
YoutubeDLCookieProcessor,
|
||||||
YoutubeDLHandler,
|
YoutubeDLHandler,
|
||||||
|
YoutubeDLRedirectHandler,
|
||||||
)
|
)
|
||||||
from .cache import Cache
|
from .cache import Cache
|
||||||
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
|
from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
|
||||||
@ -990,7 +991,7 @@ class YoutubeDL(object):
|
|||||||
'playlist_title': ie_result.get('title'),
|
'playlist_title': ie_result.get('title'),
|
||||||
'playlist_uploader': ie_result.get('uploader'),
|
'playlist_uploader': ie_result.get('uploader'),
|
||||||
'playlist_uploader_id': ie_result.get('uploader_id'),
|
'playlist_uploader_id': ie_result.get('uploader_id'),
|
||||||
'playlist_index': i + playliststart,
|
'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
|
||||||
'extractor': ie_result['extractor'],
|
'extractor': ie_result['extractor'],
|
||||||
'webpage_url': ie_result['webpage_url'],
|
'webpage_url': ie_result['webpage_url'],
|
||||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||||
@ -2343,6 +2344,7 @@ class YoutubeDL(object):
|
|||||||
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
debuglevel = 1 if self.params.get('debug_printtraffic') else 0
|
||||||
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
|
||||||
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
|
||||||
|
redirect_handler = YoutubeDLRedirectHandler()
|
||||||
data_handler = compat_urllib_request_DataHandler()
|
data_handler = compat_urllib_request_DataHandler()
|
||||||
|
|
||||||
# When passing our own FileHandler instance, build_opener won't add the
|
# When passing our own FileHandler instance, build_opener won't add the
|
||||||
@ -2356,7 +2358,7 @@ class YoutubeDL(object):
|
|||||||
file_handler.file_open = file_open
|
file_handler.file_open = file_open
|
||||||
|
|
||||||
opener = compat_urllib_request.build_opener(
|
opener = compat_urllib_request.build_opener(
|
||||||
proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
|
proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
|
||||||
|
|
||||||
# Delete the default user-agent header, which would otherwise apply in
|
# Delete the default user-agent header, which would otherwise apply in
|
||||||
# cases where our custom HTTP handler doesn't come into play
|
# cases where our custom HTTP handler doesn't come into play
|
||||||
|
@ -2754,6 +2754,17 @@ else:
|
|||||||
compat_expanduser = os.path.expanduser
|
compat_expanduser = os.path.expanduser
|
||||||
|
|
||||||
|
|
||||||
|
if compat_os_name == 'nt' and sys.version_info < (3, 8):
|
||||||
|
# os.path.realpath on Windows does not follow symbolic links
|
||||||
|
# prior to Python 3.8 (see https://bugs.python.org/issue9949)
|
||||||
|
def compat_realpath(path):
|
||||||
|
while os.path.islink(path):
|
||||||
|
path = os.path.abspath(os.readlink(path))
|
||||||
|
return path
|
||||||
|
else:
|
||||||
|
compat_realpath = os.path.realpath
|
||||||
|
|
||||||
|
|
||||||
if sys.version_info < (3, 0):
|
if sys.version_info < (3, 0):
|
||||||
def compat_print(s):
|
def compat_print(s):
|
||||||
from .utils import preferredencoding
|
from .utils import preferredencoding
|
||||||
@ -2998,6 +3009,7 @@ __all__ = [
|
|||||||
'compat_os_name',
|
'compat_os_name',
|
||||||
'compat_parse_qs',
|
'compat_parse_qs',
|
||||||
'compat_print',
|
'compat_print',
|
||||||
|
'compat_realpath',
|
||||||
'compat_setenv',
|
'compat_setenv',
|
||||||
'compat_shlex_quote',
|
'compat_shlex_quote',
|
||||||
'compat_shlex_split',
|
'compat_shlex_split',
|
||||||
|
@ -110,17 +110,17 @@ class ABCIViewIE(InfoExtractor):
|
|||||||
|
|
||||||
# ABC iview programs are normally available for 14 days only.
|
# ABC iview programs are normally available for 14 days only.
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://iview.abc.net.au/show/ben-and-hollys-little-kingdom/series/0/video/ZX9371A050S00',
|
'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
|
||||||
'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
|
'md5': '67715ce3c78426b11ba167d875ac6abf',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'ZX9371A050S00',
|
'id': 'LE1927H001S00',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Gaston's Birthday",
|
'title': "Series 11 Ep 1",
|
||||||
'series': "Ben And Holly's Little Kingdom",
|
'series': "Gruen",
|
||||||
'description': 'md5:f9de914d02f226968f598ac76f105bcf',
|
'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
|
||||||
'upload_date': '20180604',
|
'upload_date': '20190925',
|
||||||
'uploader_id': 'abc4kids',
|
'uploader_id': 'abc1',
|
||||||
'timestamp': 1528140219,
|
'timestamp': 1569445289,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -148,7 +148,7 @@ class ABCIViewIE(InfoExtractor):
|
|||||||
'hdnea': token,
|
'hdnea': token,
|
||||||
})
|
})
|
||||||
|
|
||||||
for sd in ('sd', 'sd-low'):
|
for sd in ('720', 'sd', 'sd-low'):
|
||||||
sd_url = try_get(
|
sd_url = try_get(
|
||||||
stream, lambda x: x['streams']['hls'][sd], compat_str)
|
stream, lambda x: x['streams']['hls'][sd], compat_str)
|
||||||
if not sd_url:
|
if not sd_url:
|
||||||
|
@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
encode_base_n,
|
encode_base_n,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -55,7 +54,7 @@ class EpornerIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
webpage, urlh = self._download_webpage_handle(url, display_id)
|
||||||
|
|
||||||
video_id = self._match_id(compat_str(urlh.geturl()))
|
video_id = self._match_id(urlh.geturl())
|
||||||
|
|
||||||
hash = self._search_regex(
|
hash = self._search_regex(
|
||||||
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
|
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
|
||||||
|
@ -497,7 +497,6 @@ from .jeuxvideo import JeuxVideoIE
|
|||||||
from .jove import JoveIE
|
from .jove import JoveIE
|
||||||
from .joj import JojIE
|
from .joj import JojIE
|
||||||
from .jwplatform import JWPlatformIE
|
from .jwplatform import JWPlatformIE
|
||||||
from .jpopsukitv import JpopsukiIE
|
|
||||||
from .kakao import KakaoIE
|
from .kakao import KakaoIE
|
||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
from .kanalplay import KanalPlayIE
|
from .kanalplay import KanalPlayIE
|
||||||
@ -850,6 +849,7 @@ from .polskieradio import (
|
|||||||
PolskieRadioIE,
|
PolskieRadioIE,
|
||||||
PolskieRadioCategoryIE,
|
PolskieRadioCategoryIE,
|
||||||
)
|
)
|
||||||
|
from .popcorntimes import PopcorntimesIE
|
||||||
from .popcorntv import PopcornTVIE
|
from .popcorntv import PopcornTVIE
|
||||||
from .porn91 import Porn91IE
|
from .porn91 import Porn91IE
|
||||||
from .porncom import PornComIE
|
from .porncom import PornComIE
|
||||||
|
@ -31,7 +31,13 @@ class FranceCultureIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
video_data = extract_attributes(self._search_regex(
|
video_data = extract_attributes(self._search_regex(
|
||||||
r'(?s)<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>.*?(<button[^>]+data-asset-source="[^"]+"[^>]+>)',
|
r'''(?sx)
|
||||||
|
(?:
|
||||||
|
</h1>|
|
||||||
|
<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
|
||||||
|
).*?
|
||||||
|
(<button[^>]+data-asset-source="[^"]+"[^>]+>)
|
||||||
|
''',
|
||||||
webpage, 'video data'))
|
webpage, 'video data'))
|
||||||
|
|
||||||
video_url = video_data['data-asset-source']
|
video_url = video_data['data-asset-source']
|
||||||
|
@ -2287,7 +2287,7 @@ class GenericIE(InfoExtractor):
|
|||||||
|
|
||||||
if head_response is not False:
|
if head_response is not False:
|
||||||
# Check for redirect
|
# Check for redirect
|
||||||
new_url = compat_str(head_response.geturl())
|
new_url = head_response.geturl()
|
||||||
if url != new_url:
|
if url != new_url:
|
||||||
self.report_following_redirect(new_url)
|
self.report_following_redirect(new_url)
|
||||||
if force_videoid:
|
if force_videoid:
|
||||||
@ -2387,12 +2387,12 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
self._parse_xspf(
|
self._parse_xspf(
|
||||||
doc, video_id, xspf_url=url,
|
doc, video_id, xspf_url=url,
|
||||||
xspf_base_url=compat_str(full_response.geturl())),
|
xspf_base_url=full_response.geturl()),
|
||||||
video_id)
|
video_id)
|
||||||
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
|
||||||
info_dict['formats'] = self._parse_mpd_formats(
|
info_dict['formats'] = self._parse_mpd_formats(
|
||||||
doc,
|
doc,
|
||||||
mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
|
mpd_base_url=full_response.geturl().rpartition('/')[0],
|
||||||
mpd_url=url)
|
mpd_url=url)
|
||||||
self._sort_formats(info_dict['formats'])
|
self._sort_formats(info_dict['formats'])
|
||||||
return info_dict
|
return info_dict
|
||||||
@ -2537,14 +2537,15 @@ class GenericIE(InfoExtractor):
|
|||||||
dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
|
dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
|
||||||
|
|
||||||
# Look for embedded Wistia player
|
# Look for embedded Wistia player
|
||||||
wistia_url = WistiaIE._extract_url(webpage)
|
wistia_urls = WistiaIE._extract_urls(webpage)
|
||||||
if wistia_url:
|
if wistia_urls:
|
||||||
return {
|
playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key())
|
||||||
'_type': 'url_transparent',
|
for entry in playlist['entries']:
|
||||||
'url': self._proto_relative_url(wistia_url),
|
entry.update({
|
||||||
'ie_key': WistiaIE.ie_key(),
|
'_type': 'url_transparent',
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
}
|
})
|
||||||
|
return playlist
|
||||||
|
|
||||||
# Look for SVT player
|
# Look for SVT player
|
||||||
svt_url = SVTIE._extract_url(webpage)
|
svt_url = SVTIE._extract_url(webpage)
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -8,6 +10,7 @@ from ..utils import (
|
|||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
qualities,
|
qualities,
|
||||||
|
try_get,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -15,15 +18,16 @@ from ..utils import (
|
|||||||
class ImdbIE(InfoExtractor):
|
class ImdbIE(InfoExtractor):
|
||||||
IE_NAME = 'imdb'
|
IE_NAME = 'imdb'
|
||||||
IE_DESC = 'Internet Movie Database trailers'
|
IE_DESC = 'Internet Movie Database trailers'
|
||||||
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).+?[/-]vi(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).*?[/-]vi(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
'url': 'http://www.imdb.com/video/imdb/vi2524815897',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2524815897',
|
'id': '2524815897',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'No. 2 from Ice Age: Continental Drift (2012)',
|
'title': 'No. 2',
|
||||||
'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7',
|
'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7',
|
||||||
|
'duration': 152,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.imdb.com/video/_/vi2524815897',
|
'url': 'http://www.imdb.com/video/_/vi2524815897',
|
||||||
@ -47,21 +51,23 @@ class ImdbIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(
|
|
||||||
'https://www.imdb.com/videoplayer/vi' + video_id, video_id)
|
data = self._download_json(
|
||||||
video_metadata = self._parse_json(self._search_regex(
|
'https://www.imdb.com/ve/data/VIDEO_PLAYBACK_DATA', video_id,
|
||||||
r'window\.IMDbReactInitialState\.push\(({.+?})\);', webpage,
|
query={
|
||||||
'video metadata'), video_id)['videos']['videoMetadata']['vi' + video_id]
|
'key': base64.b64encode(json.dumps({
|
||||||
title = self._html_search_meta(
|
'type': 'VIDEO_PLAYER',
|
||||||
['og:title', 'twitter:title'], webpage) or self._html_search_regex(
|
'subType': 'FORCE_LEGACY',
|
||||||
r'<title>(.+?)</title>', webpage, 'title', fatal=False) or video_metadata['title']
|
'id': 'vi%s' % video_id,
|
||||||
|
}).encode()).decode(),
|
||||||
|
})[0]
|
||||||
|
|
||||||
quality = qualities(('SD', '480p', '720p', '1080p'))
|
quality = qualities(('SD', '480p', '720p', '1080p'))
|
||||||
formats = []
|
formats = []
|
||||||
for encoding in video_metadata.get('encodings', []):
|
for encoding in data['videoLegacyEncodings']:
|
||||||
if not encoding or not isinstance(encoding, dict):
|
if not encoding or not isinstance(encoding, dict):
|
||||||
continue
|
continue
|
||||||
video_url = url_or_none(encoding.get('videoUrl'))
|
video_url = url_or_none(encoding.get('url'))
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
ext = mimetype2ext(encoding.get(
|
ext = mimetype2ext(encoding.get(
|
||||||
@ -69,7 +75,7 @@ class ImdbIE(InfoExtractor):
|
|||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls', fatal=False))
|
preference=1, m3u8_id='hls', fatal=False))
|
||||||
continue
|
continue
|
||||||
format_id = encoding.get('definition')
|
format_id = encoding.get('definition')
|
||||||
formats.append({
|
formats.append({
|
||||||
@ -80,13 +86,33 @@ class ImdbIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'https://www.imdb.com/video/vi' + video_id, video_id)
|
||||||
|
video_metadata = self._parse_json(self._search_regex(
|
||||||
|
r'args\.push\(\s*({.+?})\s*\)\s*;', webpage,
|
||||||
|
'video metadata'), video_id)
|
||||||
|
|
||||||
|
video_info = video_metadata.get('VIDEO_INFO')
|
||||||
|
if video_info and isinstance(video_info, dict):
|
||||||
|
info = try_get(
|
||||||
|
video_info, lambda x: x[list(video_info.keys())[0]][0], dict)
|
||||||
|
else:
|
||||||
|
info = {}
|
||||||
|
|
||||||
|
title = self._html_search_meta(
|
||||||
|
['og:title', 'twitter:title'], webpage) or self._html_search_regex(
|
||||||
|
r'<title>(.+?)</title>', webpage, 'title',
|
||||||
|
default=None) or info['videoTitle']
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
'alt_title': info.get('videoSubTitle'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': video_metadata.get('description'),
|
'description': info.get('videoDescription'),
|
||||||
'thumbnail': video_metadata.get('slate', {}).get('url'),
|
'thumbnail': url_or_none(try_get(
|
||||||
'duration': parse_duration(video_metadata.get('duration')),
|
video_metadata, lambda x: x['videoSlate']['source'])),
|
||||||
|
'duration': parse_duration(info.get('videoRuntime')),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,68 +0,0 @@
|
|||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
unified_strdate,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class JpopsukiIE(InfoExtractor):
|
|
||||||
IE_NAME = 'jpopsuki.tv'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?jpopsuki\.tv/(?:category/)?video/[^/]+/(?P<id>\S+)'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.jpopsuki.tv/video/ayumi-hamasaki---evolution/00be659d23b0b40508169cdee4545771',
|
|
||||||
'md5': '88018c0c1a9b1387940e90ec9e7e198e',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '00be659d23b0b40508169cdee4545771',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'ayumi hamasaki - evolution',
|
|
||||||
'description': 'Release date: 2001.01.31\r\n浜崎あゆみ - evolution',
|
|
||||||
'thumbnail': 'http://www.jpopsuki.tv/cache/89722c74d2a2ebe58bcac65321c115b2.jpg',
|
|
||||||
'uploader': 'plama_chan',
|
|
||||||
'uploader_id': '404',
|
|
||||||
'upload_date': '20121101'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
video_url = 'http://www.jpopsuki.tv' + self._html_search_regex(
|
|
||||||
r'<source src="(.*?)" type', webpage, 'video url')
|
|
||||||
|
|
||||||
video_title = self._og_search_title(webpage)
|
|
||||||
description = self._og_search_description(webpage)
|
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
|
||||||
uploader = self._html_search_regex(
|
|
||||||
r'<li>from: <a href="/user/view/user/(.*?)/uid/',
|
|
||||||
webpage, 'video uploader', fatal=False)
|
|
||||||
uploader_id = self._html_search_regex(
|
|
||||||
r'<li>from: <a href="/user/view/user/\S*?/uid/(\d*)',
|
|
||||||
webpage, 'video uploader_id', fatal=False)
|
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
|
||||||
r'<li>uploaded: (.*?)</li>', webpage, 'video upload_date',
|
|
||||||
fatal=False))
|
|
||||||
view_count_str = self._html_search_regex(
|
|
||||||
r'<li>Hits: ([0-9]+?)</li>', webpage, 'video view_count',
|
|
||||||
fatal=False)
|
|
||||||
comment_count_str = self._html_search_regex(
|
|
||||||
r'<h2>([0-9]+?) comments</h2>', webpage, 'video comment_count',
|
|
||||||
fatal=False)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'url': video_url,
|
|
||||||
'title': video_title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'uploader': uploader,
|
|
||||||
'uploader_id': uploader_id,
|
|
||||||
'upload_date': upload_date,
|
|
||||||
'view_count': int_or_none(view_count_str),
|
|
||||||
'comment_count': int_or_none(comment_count_str),
|
|
||||||
}
|
|
@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
@ -36,7 +35,7 @@ class LecturioBaseIE(InfoExtractor):
|
|||||||
self._LOGIN_URL, None, 'Downloading login popup')
|
self._LOGIN_URL, None, 'Downloading login popup')
|
||||||
|
|
||||||
def is_logged(url_handle):
|
def is_logged(url_handle):
|
||||||
return self._LOGIN_URL not in compat_str(url_handle.geturl())
|
return self._LOGIN_URL not in url_handle.geturl()
|
||||||
|
|
||||||
# Already logged in
|
# Already logged in
|
||||||
if is_logged(urlh):
|
if is_logged(urlh):
|
||||||
|
@ -8,7 +8,6 @@ from .common import InfoExtractor
|
|||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_b64decode,
|
compat_b64decode,
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
compat_str,
|
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -99,7 +98,7 @@ class LinuxAcademyIE(InfoExtractor):
|
|||||||
'sso': 'true',
|
'sso': 'true',
|
||||||
})
|
})
|
||||||
|
|
||||||
login_state_url = compat_str(urlh.geturl())
|
login_state_url = urlh.geturl()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
login_page = self._download_webpage(
|
login_page = self._download_webpage(
|
||||||
@ -129,7 +128,7 @@ class LinuxAcademyIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
|
|
||||||
access_token = self._search_regex(
|
access_token = self._search_regex(
|
||||||
r'access_token=([^=&]+)', compat_str(urlh.geturl()),
|
r'access_token=([^=&]+)', urlh.geturl(),
|
||||||
'access token')
|
'access token')
|
||||||
|
|
||||||
self._download_webpage(
|
self._download_webpage(
|
||||||
|
@ -6,7 +6,6 @@ import re
|
|||||||
from .theplatform import ThePlatformBaseIE
|
from .theplatform import ThePlatformBaseIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_str,
|
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -114,7 +113,7 @@ class MediasetIE(ThePlatformBaseIE):
|
|||||||
continue
|
continue
|
||||||
urlh = ie._request_webpage(
|
urlh = ie._request_webpage(
|
||||||
embed_url, video_id, note='Following embed URL redirect')
|
embed_url, video_id, note='Following embed URL redirect')
|
||||||
embed_url = compat_str(urlh.geturl())
|
embed_url = urlh.geturl()
|
||||||
program_guid = _program_guid(_qs(embed_url))
|
program_guid = _program_guid(_qs(embed_url))
|
||||||
if program_guid:
|
if program_guid:
|
||||||
entries.append(embed_url)
|
entries.append(embed_url)
|
||||||
|
@ -129,7 +129,7 @@ class MediasiteIE(InfoExtractor):
|
|||||||
query = mobj.group('query')
|
query = mobj.group('query')
|
||||||
|
|
||||||
webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer?
|
webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer?
|
||||||
redirect_url = compat_str(urlh.geturl())
|
redirect_url = urlh.geturl()
|
||||||
|
|
||||||
# XXX: might have also extracted UrlReferrer and QueryString from the html
|
# XXX: might have also extracted UrlReferrer and QueryString from the html
|
||||||
service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(
|
service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(
|
||||||
|
@ -31,7 +31,7 @@ class NhkVodIE(InfoExtractor):
|
|||||||
'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
|
'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7/episode/%s/%s/all%s.json'
|
_API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/episode/%s/%s/all%s.json'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()
|
lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
@ -18,7 +18,7 @@ class NovaEmbedIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
|
'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
|
||||||
'md5': 'b3834f6de5401baabf31ed57456463f7',
|
'md5': 'ee009bafcc794541570edd44b71cbea3',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '8o0n0r',
|
'id': '8o0n0r',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -44,11 +44,17 @@ class NovaEmbedIE(InfoExtractor):
|
|||||||
formats = []
|
formats = []
|
||||||
for format_id, format_list in bitrates.items():
|
for format_id, format_list in bitrates.items():
|
||||||
if not isinstance(format_list, list):
|
if not isinstance(format_list, list):
|
||||||
continue
|
format_list = [format_list]
|
||||||
for format_url in format_list:
|
for format_url in format_list:
|
||||||
format_url = url_or_none(format_url)
|
format_url = url_or_none(format_url)
|
||||||
if not format_url:
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
|
if format_id == 'hls':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, ext='mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||||
|
fatal=False))
|
||||||
|
continue
|
||||||
f = {
|
f = {
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
}
|
}
|
||||||
@ -91,7 +97,7 @@ class NovaIE(InfoExtractor):
|
|||||||
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
|
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
|
'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
|
||||||
'md5': '1dd7b9d5ea27bc361f110cd855a19bd3',
|
'md5': '249baab7d0104e186e78b0899c7d5f28',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1757139',
|
'id': '1757139',
|
||||||
'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci',
|
'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci',
|
||||||
@ -113,7 +119,8 @@ class NovaIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
# rtmp download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
},
|
||||||
|
'skip': 'gone',
|
||||||
}, {
|
}, {
|
||||||
# media.cms.nova.cz embed
|
# media.cms.nova.cz embed
|
||||||
'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil',
|
'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil',
|
||||||
@ -128,6 +135,7 @@ class NovaIE(InfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': [NovaEmbedIE.ie_key()],
|
'add_ie': [NovaEmbedIE.ie_key()],
|
||||||
|
'skip': 'CHYBA 404: STRÁNKA NENALEZENA',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
|
'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -152,14 +160,29 @@ class NovaIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
description = clean_html(self._og_search_description(webpage, default=None))
|
||||||
|
if site == 'novaplus':
|
||||||
|
upload_date = unified_strdate(self._search_regex(
|
||||||
|
r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None))
|
||||||
|
elif site == 'fanda':
|
||||||
|
upload_date = unified_strdate(self._search_regex(
|
||||||
|
r'<span class="date_time">(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None))
|
||||||
|
else:
|
||||||
|
upload_date = None
|
||||||
|
|
||||||
# novaplus
|
# novaplus
|
||||||
embed_id = self._search_regex(
|
embed_id = self._search_regex(
|
||||||
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)',
|
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)',
|
||||||
webpage, 'embed url', default=None)
|
webpage, 'embed url', default=None)
|
||||||
if embed_id:
|
if embed_id:
|
||||||
return self.url_result(
|
return {
|
||||||
'https://media.cms.nova.cz/embed/%s' % embed_id,
|
'_type': 'url_transparent',
|
||||||
ie=NovaEmbedIE.ie_key(), video_id=embed_id)
|
'url': 'https://media.cms.nova.cz/embed/%s' % embed_id,
|
||||||
|
'ie_key': NovaEmbedIE.ie_key(),
|
||||||
|
'id': embed_id,
|
||||||
|
'description': description,
|
||||||
|
'upload_date': upload_date
|
||||||
|
}
|
||||||
|
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
[r"(?:media|video_id)\s*:\s*'(\d+)'",
|
[r"(?:media|video_id)\s*:\s*'(\d+)'",
|
||||||
@ -233,18 +256,8 @@ class NovaIE(InfoExtractor):
|
|||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage)
|
title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage)
|
||||||
description = clean_html(self._og_search_description(webpage, default=None))
|
|
||||||
thumbnail = config.get('poster')
|
thumbnail = config.get('poster')
|
||||||
|
|
||||||
if site == 'novaplus':
|
|
||||||
upload_date = unified_strdate(self._search_regex(
|
|
||||||
r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None))
|
|
||||||
elif site == 'fanda':
|
|
||||||
upload_date = unified_strdate(self._search_regex(
|
|
||||||
r'<span class="date_time">(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None))
|
|
||||||
else:
|
|
||||||
upload_date = None
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
|
@ -4,6 +4,7 @@ from .common import InfoExtractor
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
qualities,
|
qualities,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -48,6 +49,10 @@ class NprIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
}],
|
}],
|
||||||
'expected_warnings': ['Failed to download m3u8 information'],
|
'expected_warnings': ['Failed to download m3u8 information'],
|
||||||
|
}, {
|
||||||
|
# multimedia, no formats, stream
|
||||||
|
'url': 'https://www.npr.org/2020/02/14/805476846/laura-stevenson-tiny-desk-concert',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -95,6 +100,17 @@ class NprIE(InfoExtractor):
|
|||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'quality': quality(format_id),
|
'quality': quality(format_id),
|
||||||
})
|
})
|
||||||
|
for stream_id, stream_entry in media.get('stream', {}).items():
|
||||||
|
if not isinstance(stream_entry, dict):
|
||||||
|
continue
|
||||||
|
if stream_id != 'hlsUrl':
|
||||||
|
continue
|
||||||
|
stream_url = url_or_none(stream_entry.get('$text'))
|
||||||
|
if not stream_url:
|
||||||
|
continue
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
stream_url, stream_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
entries.append({
|
entries.append({
|
||||||
|
@ -69,10 +69,10 @@ class NYTimesBaseIE(InfoExtractor):
|
|||||||
'width': int_or_none(video.get('width')),
|
'width': int_or_none(video.get('width')),
|
||||||
'height': int_or_none(video.get('height')),
|
'height': int_or_none(video.get('height')),
|
||||||
'filesize': get_file_size(video.get('file_size') or video.get('fileSize')),
|
'filesize': get_file_size(video.get('file_size') or video.get('fileSize')),
|
||||||
'tbr': int_or_none(video.get('bitrate'), 1000),
|
'tbr': int_or_none(video.get('bitrate'), 1000) or None,
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats, ('height', 'width', 'filesize', 'tbr', 'fps', 'format_id'))
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for image in video_data.get('images', []):
|
for image in video_data.get('images', []):
|
||||||
|
@ -8,6 +8,7 @@ from ..compat import compat_str
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_resolution,
|
parse_resolution,
|
||||||
|
str_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
@ -415,6 +416,7 @@ class PeerTubeIE(InfoExtractor):
|
|||||||
peertube\.cpy\.re
|
peertube\.cpy\.re
|
||||||
)'''
|
)'''
|
||||||
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
|
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
|
||||||
|
_API_BASE = 'https://%s/api/v1/videos/%s/%s'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
(?:
|
(?:
|
||||||
peertube:(?P<host>[^:]+):|
|
peertube:(?P<host>[^:]+):|
|
||||||
@ -423,26 +425,30 @@ class PeerTubeIE(InfoExtractor):
|
|||||||
(?P<id>%s)
|
(?P<id>%s)
|
||||||
''' % (_INSTANCES_RE, _UUID_RE)
|
''' % (_INSTANCES_RE, _UUID_RE)
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://peertube.cpy.re/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
|
'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
|
||||||
'md5': '80f24ff364cc9d333529506a263e7feb',
|
'md5': '9bed8c0137913e17b86334e5885aacff',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c',
|
'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'wow',
|
'title': 'What is PeerTube?',
|
||||||
'description': 'wow such video, so gif',
|
'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
|
||||||
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
||||||
'timestamp': 1519297480,
|
'timestamp': 1538391166,
|
||||||
'upload_date': '20180222',
|
'upload_date': '20181001',
|
||||||
'uploader': 'Luclu7',
|
'uploader': 'Framasoft',
|
||||||
'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1',
|
'uploader_id': '3',
|
||||||
'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7',
|
'uploader_url': 'https://framatube.org/accounts/framasoft',
|
||||||
'license': 'Unknown',
|
'channel': 'Les vidéos de Framasoft',
|
||||||
'duration': 3,
|
'channel_id': '2',
|
||||||
|
'channel_url': 'https://framatube.org/video-channels/bf54d359-cfad-4935-9d45-9d6be93f63e8',
|
||||||
|
'language': 'en',
|
||||||
|
'license': 'Attribution - Share Alike',
|
||||||
|
'duration': 113,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
'dislike_count': int,
|
||||||
'tags': list,
|
'tags': ['framasoft', 'peertube'],
|
||||||
'categories': list,
|
'categories': ['Science & Technology'],
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
|
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
|
||||||
@ -484,13 +490,38 @@ class PeerTubeIE(InfoExtractor):
|
|||||||
entries = [peertube_url]
|
entries = [peertube_url]
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True):
|
||||||
|
return self._download_json(
|
||||||
|
self._API_BASE % (host, video_id, path), video_id,
|
||||||
|
note=note, errnote=errnote, fatal=fatal)
|
||||||
|
|
||||||
|
def _get_subtitles(self, host, video_id):
|
||||||
|
captions = self._call_api(
|
||||||
|
host, video_id, 'captions', note='Downloading captions JSON',
|
||||||
|
fatal=False)
|
||||||
|
if not isinstance(captions, dict):
|
||||||
|
return
|
||||||
|
data = captions.get('data')
|
||||||
|
if not isinstance(data, list):
|
||||||
|
return
|
||||||
|
subtitles = {}
|
||||||
|
for e in data:
|
||||||
|
language_id = try_get(e, lambda x: x['language']['id'], compat_str)
|
||||||
|
caption_url = urljoin('https://%s' % host, e.get('captionPath'))
|
||||||
|
if not caption_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(language_id or 'en', []).append({
|
||||||
|
'url': caption_url,
|
||||||
|
})
|
||||||
|
return subtitles
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
host = mobj.group('host') or mobj.group('host_2')
|
host = mobj.group('host') or mobj.group('host_2')
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
video = self._download_json(
|
video = self._call_api(
|
||||||
'https://%s/api/v1/videos/%s' % (host, video_id), video_id)
|
host, video_id, '', note='Downloading video JSON')
|
||||||
|
|
||||||
title = video['name']
|
title = video['name']
|
||||||
|
|
||||||
@ -513,10 +544,28 @@ class PeerTubeIE(InfoExtractor):
|
|||||||
formats.append(f)
|
formats.append(f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
def account_data(field):
|
full_description = self._call_api(
|
||||||
return try_get(video, lambda x: x['account'][field], compat_str)
|
host, video_id, 'description', note='Downloading description JSON',
|
||||||
|
fatal=False)
|
||||||
|
|
||||||
category = try_get(video, lambda x: x['category']['label'], compat_str)
|
description = None
|
||||||
|
if isinstance(full_description, dict):
|
||||||
|
description = str_or_none(full_description.get('description'))
|
||||||
|
if not description:
|
||||||
|
description = video.get('description')
|
||||||
|
|
||||||
|
subtitles = self.extract_subtitles(host, video_id)
|
||||||
|
|
||||||
|
def data(section, field, type_):
|
||||||
|
return try_get(video, lambda x: x[section][field], type_)
|
||||||
|
|
||||||
|
def account_data(field, type_):
|
||||||
|
return data('account', field, type_)
|
||||||
|
|
||||||
|
def channel_data(field, type_):
|
||||||
|
return data('channel', field, type_)
|
||||||
|
|
||||||
|
category = data('category', 'label', compat_str)
|
||||||
categories = [category] if category else None
|
categories = [category] if category else None
|
||||||
|
|
||||||
nsfw = video.get('nsfw')
|
nsfw = video.get('nsfw')
|
||||||
@ -528,14 +577,17 @@ class PeerTubeIE(InfoExtractor):
|
|||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': video.get('description'),
|
'description': description,
|
||||||
'thumbnail': urljoin(url, video.get('thumbnailPath')),
|
'thumbnail': urljoin(url, video.get('thumbnailPath')),
|
||||||
'timestamp': unified_timestamp(video.get('publishedAt')),
|
'timestamp': unified_timestamp(video.get('publishedAt')),
|
||||||
'uploader': account_data('displayName'),
|
'uploader': account_data('displayName', compat_str),
|
||||||
'uploader_id': account_data('uuid'),
|
'uploader_id': str_or_none(account_data('id', int)),
|
||||||
'uploder_url': account_data('url'),
|
'uploader_url': url_or_none(account_data('url', compat_str)),
|
||||||
'license': try_get(
|
'channel': channel_data('displayName', compat_str),
|
||||||
video, lambda x: x['licence']['label'], compat_str),
|
'channel_id': str_or_none(channel_data('id', int)),
|
||||||
|
'channel_url': url_or_none(channel_data('url', compat_str)),
|
||||||
|
'language': data('language', 'id', compat_str),
|
||||||
|
'license': data('licence', 'label', compat_str),
|
||||||
'duration': int_or_none(video.get('duration')),
|
'duration': int_or_none(video.get('duration')),
|
||||||
'view_count': int_or_none(video.get('views')),
|
'view_count': int_or_none(video.get('views')),
|
||||||
'like_count': int_or_none(video.get('likes')),
|
'like_count': int_or_none(video.get('likes')),
|
||||||
@ -544,4 +596,5 @@ class PeerTubeIE(InfoExtractor):
|
|||||||
'tags': try_get(video, lambda x: x['tags'], list),
|
'tags': try_get(video, lambda x: x['tags'], list),
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles
|
||||||
}
|
}
|
||||||
|
@ -46,7 +46,7 @@ class PlatziBaseIE(InfoExtractor):
|
|||||||
headers={'Referer': self._LOGIN_URL})
|
headers={'Referer': self._LOGIN_URL})
|
||||||
|
|
||||||
# login succeeded
|
# login succeeded
|
||||||
if 'platzi.com/login' not in compat_str(urlh.geturl()):
|
if 'platzi.com/login' not in urlh.geturl():
|
||||||
return
|
return
|
||||||
|
|
||||||
login_error = self._webpage_read_content(
|
login_error = self._webpage_read_content(
|
||||||
|
99
youtube_dl/extractor/popcorntimes.py
Normal file
99
youtube_dl/extractor/popcorntimes.py
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_b64decode,
|
||||||
|
compat_chr,
|
||||||
|
)
|
||||||
|
from ..utils import int_or_none
|
||||||
|
|
||||||
|
|
||||||
|
class PopcorntimesIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://popcorntimes\.tv/[^/]+/m/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://popcorntimes.tv/de/m/A1XCFvz/haensel-und-gretel-opera-fantasy',
|
||||||
|
'md5': '93f210991ad94ba8c3485950a2453257',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'A1XCFvz',
|
||||||
|
'display_id': 'haensel-und-gretel-opera-fantasy',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Hänsel und Gretel',
|
||||||
|
'description': 'md5:1b8146791726342e7b22ce8125cf6945',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'creator': 'John Paul',
|
||||||
|
'release_date': '19541009',
|
||||||
|
'duration': 4260,
|
||||||
|
'tbr': 5380,
|
||||||
|
'width': 720,
|
||||||
|
'height': 540,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id, display_id = mobj.group('id', 'display_id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'<h1>([^<]+)', webpage, 'title',
|
||||||
|
default=None) or self._html_search_meta(
|
||||||
|
'ya:ovs:original_name', webpage, 'title', fatal=True)
|
||||||
|
|
||||||
|
loc = self._search_regex(
|
||||||
|
r'PCTMLOC\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'loc',
|
||||||
|
group='value')
|
||||||
|
|
||||||
|
loc_b64 = ''
|
||||||
|
for c in loc:
|
||||||
|
c_ord = ord(c)
|
||||||
|
if ord('a') <= c_ord <= ord('z') or ord('A') <= c_ord <= ord('Z'):
|
||||||
|
upper = ord('Z') if c_ord <= ord('Z') else ord('z')
|
||||||
|
c_ord += 13
|
||||||
|
if upper < c_ord:
|
||||||
|
c_ord -= 26
|
||||||
|
loc_b64 += compat_chr(c_ord)
|
||||||
|
|
||||||
|
video_url = compat_b64decode(loc_b64).decode('utf-8')
|
||||||
|
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'(?s)<div[^>]+class=["\']pt-movie-desc[^>]+>(.+?)</div>', webpage,
|
||||||
|
'description', fatal=False)
|
||||||
|
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r'<img[^>]+class=["\']video-preview[^>]+\bsrc=(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'thumbnail', default=None,
|
||||||
|
group='value') or self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
|
creator = self._html_search_meta(
|
||||||
|
'video:director', webpage, 'creator', default=None)
|
||||||
|
|
||||||
|
release_date = self._html_search_meta(
|
||||||
|
'video:release_date', webpage, default=None)
|
||||||
|
if release_date:
|
||||||
|
release_date = release_date.replace('-', '')
|
||||||
|
|
||||||
|
def int_meta(name):
|
||||||
|
return int_or_none(self._html_search_meta(
|
||||||
|
name, webpage, default=None))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'creator': creator,
|
||||||
|
'release_date': release_date,
|
||||||
|
'duration': int_meta('video:duration'),
|
||||||
|
'tbr': int_meta('ya:ovs:bitrate'),
|
||||||
|
'width': int_meta('og:video:width'),
|
||||||
|
'height': int_meta('og:video:height'),
|
||||||
|
'http_headers': {
|
||||||
|
'Referer': url,
|
||||||
|
},
|
||||||
|
}
|
@ -8,6 +8,7 @@ from ..utils import (
|
|||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
|
merge_dicts,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -27,23 +28,22 @@ class PornHdIE(InfoExtractor):
|
|||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
},
|
||||||
|
'skip': 'HTTP Error 404: Not Found',
|
||||||
}, {
|
}, {
|
||||||
# removed video
|
|
||||||
'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
|
'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
|
||||||
'md5': '956b8ca569f7f4d8ec563e2c41598441',
|
'md5': '1b7b3a40b9d65a8e5b25f7ab9ee6d6de',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1962',
|
'id': '1962',
|
||||||
'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
|
'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Sierra loves doing laundry',
|
'title': 'md5:98c6f8b2d9c229d0f0fde47f61a1a759',
|
||||||
'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
|
'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
'skip': 'Not available anymore',
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -61,7 +61,13 @@ class PornHdIE(InfoExtractor):
|
|||||||
r"(?s)sources'?\s*[:=]\s*(\{.+?\})",
|
r"(?s)sources'?\s*[:=]\s*(\{.+?\})",
|
||||||
webpage, 'sources', default='{}')), video_id)
|
webpage, 'sources', default='{}')), video_id)
|
||||||
|
|
||||||
|
info = {}
|
||||||
if not sources:
|
if not sources:
|
||||||
|
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
||||||
|
if entries:
|
||||||
|
info = entries[0]
|
||||||
|
|
||||||
|
if not sources and not info:
|
||||||
message = self._html_search_regex(
|
message = self._html_search_regex(
|
||||||
r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1',
|
r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1',
|
||||||
webpage, 'error message', group='value')
|
webpage, 'error message', group='value')
|
||||||
@ -80,23 +86,29 @@ class PornHdIE(InfoExtractor):
|
|||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'height': height,
|
'height': height,
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
if formats:
|
||||||
|
info['formats'] = formats
|
||||||
|
self._sort_formats(info['formats'])
|
||||||
|
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1',
|
(r'(?s)<section[^>]+class=["\']video-description[^>]+>(?P<value>.+?)</section>',
|
||||||
webpage, 'description', fatal=False, group='value')
|
r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1'),
|
||||||
|
webpage, 'description', fatal=False,
|
||||||
|
group='value') or self._html_search_meta(
|
||||||
|
'description', webpage, default=None) or self._og_search_description(webpage)
|
||||||
view_count = int_or_none(self._html_search_regex(
|
view_count = int_or_none(self._html_search_regex(
|
||||||
r'(\d+) views\s*<', webpage, 'view count', fatal=False))
|
r'(\d+) views\s*<', webpage, 'view count', fatal=False))
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
|
r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
|
||||||
'thumbnail', fatal=False, group='url')
|
'thumbnail', default=None, group='url')
|
||||||
|
|
||||||
like_count = int_or_none(self._search_regex(
|
like_count = int_or_none(self._search_regex(
|
||||||
(r'(\d+)\s*</11[^>]+>(?: |\s)*\blikes',
|
(r'(\d+)</span>\s*likes',
|
||||||
|
r'(\d+)\s*</11[^>]+>(?: |\s)*\blikes',
|
||||||
r'class=["\']save-count["\'][^>]*>\s*(\d+)'),
|
r'class=["\']save-count["\'][^>]*>\s*(\d+)'),
|
||||||
webpage, 'like count', fatal=False))
|
webpage, 'like count', fatal=False))
|
||||||
|
|
||||||
return {
|
return merge_dicts(info, {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
@ -106,4 +118,4 @@ class PornHdIE(InfoExtractor):
|
|||||||
'like_count': like_count,
|
'like_count': like_count,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
})
|
||||||
|
@ -52,7 +52,7 @@ class PornHubIE(PornHubBaseIE):
|
|||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||||
(?:www\.)?thumbzilla\.com/video/
|
(?:www\.)?thumbzilla\.com/video/
|
||||||
)
|
)
|
||||||
(?P<id>[\da-z]+)
|
(?P<id>[\da-z]+)
|
||||||
@ -149,6 +149,9 @@ class PornHubIE(PornHubBaseIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
|
'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -166,6 +169,13 @@ class PornHubIE(PornHubBaseIE):
|
|||||||
host = mobj.group('host') or 'pornhub.com'
|
host = mobj.group('host') or 'pornhub.com'
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
if 'premium' in host:
|
||||||
|
if not self._downloader.params.get('cookiefile'):
|
||||||
|
raise ExtractorError(
|
||||||
|
'PornHub Premium requires authentication.'
|
||||||
|
' You may want to use --cookies.',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
self._set_cookie(host, 'age_verified', '1')
|
self._set_cookie(host, 'age_verified', '1')
|
||||||
|
|
||||||
def dl_webpage(platform):
|
def dl_webpage(platform):
|
||||||
@ -189,10 +199,10 @@ class PornHubIE(PornHubBaseIE):
|
|||||||
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
|
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
|
||||||
# on that anymore.
|
# on that anymore.
|
||||||
title = self._html_search_meta(
|
title = self._html_search_meta(
|
||||||
'twitter:title', webpage, default=None) or self._search_regex(
|
'twitter:title', webpage, default=None) or self._html_search_regex(
|
||||||
(r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
|
(r'(?s)<h1[^>]+class=["\']title["\'][^>]*>(?P<title>.+?)</h1>',
|
||||||
r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
|
r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||||
r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
|
r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
|
||||||
webpage, 'title', group='title')
|
webpage, 'title', group='title')
|
||||||
|
|
||||||
video_urls = []
|
video_urls = []
|
||||||
@ -405,7 +415,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
|
|||||||
|
|
||||||
|
|
||||||
class PornHubUserIE(PornHubPlaylistBaseIE):
|
class PornHubUserIE(PornHubPlaylistBaseIE):
|
||||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
|
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.pornhub.com/model/zoe_ph',
|
'url': 'https://www.pornhub.com/model/zoe_ph',
|
||||||
'playlist_mincount': 118,
|
'playlist_mincount': 118,
|
||||||
@ -473,7 +483,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
|||||||
|
|
||||||
|
|
||||||
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||||
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.pornhub.com/model/zoe_ph/videos',
|
'url': 'https://www.pornhub.com/model/zoe_ph/videos',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -588,7 +598,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
|||||||
|
|
||||||
|
|
||||||
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
||||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
|
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
|
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -8,7 +8,6 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_str,
|
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -39,13 +38,13 @@ class SafariBaseIE(InfoExtractor):
|
|||||||
'Downloading login page')
|
'Downloading login page')
|
||||||
|
|
||||||
def is_logged(urlh):
|
def is_logged(urlh):
|
||||||
return 'learning.oreilly.com/home/' in compat_str(urlh.geturl())
|
return 'learning.oreilly.com/home/' in urlh.geturl()
|
||||||
|
|
||||||
if is_logged(urlh):
|
if is_logged(urlh):
|
||||||
self.LOGGED_IN = True
|
self.LOGGED_IN = True
|
||||||
return
|
return
|
||||||
|
|
||||||
redirect_url = compat_str(urlh.geturl())
|
redirect_url = urlh.geturl()
|
||||||
parsed_url = compat_urlparse.urlparse(redirect_url)
|
parsed_url = compat_urlparse.urlparse(redirect_url)
|
||||||
qs = compat_parse_qs(parsed_url.query)
|
qs = compat_parse_qs(parsed_url.query)
|
||||||
next_uri = compat_urlparse.urljoin(
|
next_uri = compat_urlparse.urljoin(
|
||||||
|
@ -7,9 +7,18 @@ from .common import InfoExtractor
|
|||||||
|
|
||||||
|
|
||||||
class ServusIE(InfoExtractor):
|
class ServusIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)/(?P<id>[aA]{2}-\w+|\d+-\d+)'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:www\.)?
|
||||||
|
(?:
|
||||||
|
servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)|
|
||||||
|
servustv\.com/videos
|
||||||
|
)
|
||||||
|
/(?P<id>[aA]{2}-\w+|\d+-\d+)
|
||||||
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
|
# new URL schema
|
||||||
|
'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/',
|
||||||
'md5': '3e1dd16775aa8d5cbef23628cfffc1f4',
|
'md5': '3e1dd16775aa8d5cbef23628cfffc1f4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'AA-1T6VBU5PW1W12',
|
'id': 'AA-1T6VBU5PW1W12',
|
||||||
@ -18,6 +27,10 @@ class ServusIE(InfoExtractor):
|
|||||||
'description': 'md5:1247204d85783afe3682644398ff2ec4',
|
'description': 'md5:1247204d85783afe3682644398ff2ec4',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# old URL schema
|
||||||
|
'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
|
'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -13,36 +13,18 @@ from ..utils import (
|
|||||||
class SportDeutschlandIE(InfoExtractor):
|
class SportDeutschlandIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
|
_VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://sportdeutschland.tv/badminton/live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
|
'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
|
'id': 're-live-deutsche-meisterschaften-2020-halbfinals',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 're:Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen',
|
'title': 're:Re-live: Deutsche Meisterschaften 2020.*Halbfinals',
|
||||||
'categories': ['Badminton'],
|
'categories': ['Badminton-Deutschland'],
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
|
||||||
'description': r're:Die Badminton-WM 2014 aus Kopenhagen bei Sportdeutschland\.TV',
|
|
||||||
'timestamp': int,
|
'timestamp': int,
|
||||||
'upload_date': 're:^201408[23][0-9]$',
|
'upload_date': '20200201',
|
||||||
|
'description': 're:.*', # meaningless description for THIS video
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
'skip_download': 'Live stream',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://sportdeutschland.tv/li-ning-badminton-wm-2014/lee-li-ning-badminton-weltmeisterschaft-2014-kopenhagen-herren-einzel-wei-vs',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'lee-li-ning-badminton-weltmeisterschaft-2014-kopenhagen-herren-einzel-wei-vs',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'upload_date': '20140825',
|
|
||||||
'description': 'md5:60a20536b57cee7d9a4ec005e8687504',
|
|
||||||
'timestamp': 1408976060,
|
|
||||||
'duration': 2732,
|
|
||||||
'title': 'Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen: Herren Einzel, Wei Lee vs. Keun Lee',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'view_count': int,
|
|
||||||
'categories': ['Li-Ning Badminton WM 2014'],
|
|
||||||
|
|
||||||
}
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -50,7 +32,7 @@ class SportDeutschlandIE(InfoExtractor):
|
|||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
sport_id = mobj.group('sport')
|
sport_id = mobj.group('sport')
|
||||||
|
|
||||||
api_url = 'http://proxy.vidibusdynamic.net/sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
|
api_url = 'https://proxy.vidibusdynamic.net/ssl/backend.sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
|
||||||
sport_id, video_id)
|
sport_id, video_id)
|
||||||
req = sanitized_Request(api_url, headers={
|
req = sanitized_Request(api_url, headers={
|
||||||
'Accept': 'application/vnd.vidibus.v2.html+json',
|
'Accept': 'application/vnd.vidibus.v2.html+json',
|
||||||
|
@ -4,7 +4,6 @@ import re
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .wistia import WistiaIE
|
from .wistia import WistiaIE
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -58,7 +57,7 @@ class TeachableBaseIE(InfoExtractor):
|
|||||||
self._logged_in = True
|
self._logged_in = True
|
||||||
return
|
return
|
||||||
|
|
||||||
login_url = compat_str(urlh.geturl())
|
login_url = urlh.geturl()
|
||||||
|
|
||||||
login_form = self._hidden_inputs(login_page)
|
login_form = self._hidden_inputs(login_page)
|
||||||
|
|
||||||
@ -160,8 +159,8 @@ class TeachableIE(TeachableBaseIE):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
wistia_url = WistiaIE._extract_url(webpage)
|
wistia_urls = WistiaIE._extract_urls(webpage)
|
||||||
if not wistia_url:
|
if not wistia_urls:
|
||||||
if any(re.search(p, webpage) for p in (
|
if any(re.search(p, webpage) for p in (
|
||||||
r'class=["\']lecture-contents-locked',
|
r'class=["\']lecture-contents-locked',
|
||||||
r'>\s*Lecture contents locked',
|
r'>\s*Lecture contents locked',
|
||||||
@ -174,12 +173,14 @@ class TeachableIE(TeachableBaseIE):
|
|||||||
|
|
||||||
title = self._og_search_title(webpage, default=None)
|
title = self._og_search_title(webpage, default=None)
|
||||||
|
|
||||||
return {
|
entries = [{
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': wistia_url,
|
'url': wistia_url,
|
||||||
'ie_key': WistiaIE.ie_key(),
|
'ie_key': WistiaIE.ie_key(),
|
||||||
'title': title,
|
'title': title,
|
||||||
}
|
} for wistia_url in wistia_urls]
|
||||||
|
|
||||||
|
return self.playlist_result(entries, video_id, title)
|
||||||
|
|
||||||
|
|
||||||
class TeachableCourseIE(TeachableBaseIE):
|
class TeachableCourseIE(TeachableBaseIE):
|
||||||
|
@ -11,6 +11,7 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
try_get,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -24,7 +25,7 @@ class TelecincoIE(InfoExtractor):
|
|||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1876350223',
|
'id': '1876350223',
|
||||||
'title': 'Bacalao con kokotxas al pil-pil',
|
'title': 'Bacalao con kokotxas al pil-pil',
|
||||||
'description': 'md5:1382dacd32dd4592d478cbdca458e5bb',
|
'description': 'md5:716caf5601e25c3c5ab6605b1ae71529',
|
||||||
},
|
},
|
||||||
'playlist': [{
|
'playlist': [{
|
||||||
'md5': 'adb28c37238b675dad0f042292f209a7',
|
'md5': 'adb28c37238b675dad0f042292f209a7',
|
||||||
@ -55,6 +56,26 @@ class TelecincoIE(InfoExtractor):
|
|||||||
'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
|
'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
|
||||||
'duration': 50,
|
'duration': 50,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# video in opening's content
|
||||||
|
'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2907195140',
|
||||||
|
'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
|
||||||
|
'description': 'md5:73f340a7320143d37ab895375b2bf13a',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'md5': 'adb28c37238b675dad0f042292f209a7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'TpI2EttSDAReWpJ1o0NVh2',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
|
||||||
|
'duration': 1015,
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
|
'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -135,17 +156,28 @@ class TelecincoIE(InfoExtractor):
|
|||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
article = self._parse_json(self._search_regex(
|
article = self._parse_json(self._search_regex(
|
||||||
r'window\.\$REACTBASE_STATE\.article\s*=\s*({.+})',
|
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=\s*({.+})',
|
||||||
webpage, 'article'), display_id)['article']
|
webpage, 'article'), display_id)['article']
|
||||||
title = article.get('title')
|
title = article.get('title')
|
||||||
description = clean_html(article.get('leadParagraph'))
|
description = clean_html(article.get('leadParagraph')) or ''
|
||||||
if article.get('editorialType') != 'VID':
|
if article.get('editorialType') != 'VID':
|
||||||
entries = []
|
entries = []
|
||||||
for p in article.get('body', []):
|
body = [article.get('opening')]
|
||||||
content = p.get('content')
|
body.extend(try_get(article, lambda x: x['body'], list) or [])
|
||||||
if p.get('type') != 'video' or not content:
|
for p in body:
|
||||||
|
if not isinstance(p, dict):
|
||||||
continue
|
continue
|
||||||
entries.append(self._parse_content(content, url))
|
content = p.get('content')
|
||||||
|
if not content:
|
||||||
|
continue
|
||||||
|
type_ = p.get('type')
|
||||||
|
if type_ == 'paragraph':
|
||||||
|
content_str = str_or_none(content)
|
||||||
|
if content_str:
|
||||||
|
description += content_str
|
||||||
|
continue
|
||||||
|
if type_ == 'video' and isinstance(content, dict):
|
||||||
|
entries.append(self._parse_content(content, url))
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, str_or_none(article.get('id')), title, description)
|
entries, str_or_none(article.get('id')), title, description)
|
||||||
content = article['opening']['content']
|
content = article['opening']['content']
|
||||||
|
@ -2,43 +2,42 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import try_get
|
|
||||||
|
|
||||||
|
|
||||||
class ThisOldHouseIE(InfoExtractor):
|
class ThisOldHouseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode)/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/]+/)?\d+)/(?P<id>[^/?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
|
'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
|
||||||
'md5': '568acf9ca25a639f0c4ff905826b662f',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2REGtUDQ',
|
'id': '5dcdddf673c3f956ef5db202',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'How to Build a Storage Bench',
|
'title': 'How to Build a Storage Bench',
|
||||||
'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
|
'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
|
||||||
'timestamp': 1442548800,
|
'timestamp': 1442548800,
|
||||||
'upload_date': '20150918',
|
'upload_date': '20150918',
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
|
'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric',
|
'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
_ZYPE_TMPL = 'https://player.zype.com/embed/%s.html?api_key=hsOk_yMSPYNrT22e9pu8hihLXjaZf0JW5jsOWv4ZqyHJFvkJn6rtToHl09tbbsbe'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
(r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1',
|
r'<iframe[^>]+src=[\'"](?:https?:)?//thisoldhouse\.chorus\.build/videos/zype/([0-9a-f]{24})',
|
||||||
r'id=(["\'])inline-video-player-(?P<id>(?:(?!\1).)+)\1'),
|
webpage, 'video id')
|
||||||
webpage, 'video id', default=None, group='id')
|
return self.url_result(self._ZYPE_TMPL % video_id, 'Zype', video_id)
|
||||||
if not video_id:
|
|
||||||
drupal_settings = self._parse_json(self._search_regex(
|
|
||||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
|
||||||
webpage, 'drupal settings'), display_id)
|
|
||||||
video_id = try_get(
|
|
||||||
drupal_settings, lambda x: x['jwplatform']['video_id'],
|
|
||||||
compat_str) or list(drupal_settings['comScore'])[0]
|
|
||||||
return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id)
|
|
||||||
|
@ -17,9 +17,9 @@ from ..utils import (
|
|||||||
|
|
||||||
class ToggleIE(InfoExtractor):
|
class ToggleIE(InfoExtractor):
|
||||||
IE_NAME = 'toggle'
|
IE_NAME = 'toggle'
|
||||||
_VALID_URL = r'https?://video\.toggle\.sg/(?:en|zh)/(?:[^/]+/){2,}(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:(?:www\.)?mewatch|video\.toggle)\.sg/(?:en|zh)/(?:[^/]+/){2,}(?P<id>[0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://video.toggle.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
|
'url': 'http://www.mewatch.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '343115',
|
'id': '343115',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -33,7 +33,7 @@ class ToggleIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'note': 'DRM-protected video',
|
'note': 'DRM-protected video',
|
||||||
'url': 'http://video.toggle.sg/en/movies/dug-s-special-mission/341413',
|
'url': 'http://www.mewatch.sg/en/movies/dug-s-special-mission/341413',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '341413',
|
'id': '341413',
|
||||||
'ext': 'wvm',
|
'ext': 'wvm',
|
||||||
@ -48,7 +48,7 @@ class ToggleIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
# this also tests correct video id extraction
|
# this also tests correct video id extraction
|
||||||
'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay',
|
'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay',
|
||||||
'url': 'http://video.toggle.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
|
'url': 'http://www.mewatch.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '332861',
|
'id': '332861',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
@ -65,19 +65,22 @@ class ToggleIE(InfoExtractor):
|
|||||||
'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
|
'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://video.toggle.sg/zh/series/zero-calling-s2-hd/ep13/336367',
|
'url': 'http://www.mewatch.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://video.toggle.sg/en/series/vetri-s2/webisodes/jeeva-is-an-orphan-vetri-s2-webisode-7/342302',
|
'url': 'http://www.mewatch.sg/zh/series/zero-calling-s2-hd/ep13/336367',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://video.toggle.sg/en/movies/seven-days/321936',
|
'url': 'http://www.mewatch.sg/en/series/vetri-s2/webisodes/jeeva-is-an-orphan-vetri-s2-webisode-7/342302',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://video.toggle.sg/en/tv-show/news/may-2017-cna-singapore-tonight/fri-19-may-2017/512456',
|
'url': 'http://www.mewatch.sg/en/movies/seven-days/321936',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://video.toggle.sg/en/channels/eleven-plus/401585',
|
'url': 'https://www.mewatch.sg/en/tv-show/news/may-2017-cna-singapore-tonight/fri-19-may-2017/512456',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.mewatch.sg/en/channels/eleven-plus/401585',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -151,7 +150,7 @@ class TumblrIE(InfoExtractor):
|
|||||||
url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
|
url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
|
||||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||||
|
|
||||||
redirect_url = compat_str(urlh.geturl())
|
redirect_url = urlh.geturl()
|
||||||
if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'):
|
if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'This Tumblr may contain sensitive media. '
|
'This Tumblr may contain sensitive media. '
|
||||||
|
@ -106,7 +106,7 @@ class TV2DKBornholmPlayIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
'http://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id,
|
'https://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id,
|
||||||
data=json.dumps({
|
data=json.dumps({
|
||||||
'playlist_id': video_id,
|
'playlist_id': video_id,
|
||||||
'serienavn': '',
|
'serienavn': '',
|
||||||
|
@ -3,31 +3,51 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
|
||||||
determine_ext,
|
determine_ext,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
get_element_by_class,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TV5MondePlusIE(InfoExtractor):
|
class TV5MondePlusIE(InfoExtractor):
|
||||||
IE_DESC = 'TV5MONDE+'
|
IE_DESC = 'TV5MONDE+'
|
||||||
_VALID_URL = r'https?://(?:www\.)?tv5mondeplus\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.tv5mondeplus.com/toutes-les-videos/documentaire/tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
|
# movie
|
||||||
'md5': '12130fc199f020673138a83466542ec6',
|
'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/rendez-vous-a-atlit',
|
||||||
|
'md5': '8cbde5ea7b296cf635073e27895e227f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
|
'id': '822a4756-0712-7329-1859-a13ac7fd1407',
|
||||||
|
'display_id': 'rendez-vous-a-atlit',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Tdah, mon amour - Enfants',
|
'title': 'Rendez-vous à Atlit',
|
||||||
'description': 'md5:230e3aca23115afcf8006d1bece6df74',
|
'description': 'md5:2893a4c5e1dbac3eedff2d87956e4efb',
|
||||||
'upload_date': '20170401',
|
'upload_date': '20200130',
|
||||||
'timestamp': 1491022860,
|
},
|
||||||
}
|
}, {
|
||||||
}
|
# series episode
|
||||||
|
'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/c-est-la-vie-ennemie-juree',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0df7007c-4900-3936-c601-87a13a93a068',
|
||||||
|
'display_id': 'c-est-la-vie-ennemie-juree',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "C'est la vie - Ennemie jurée",
|
||||||
|
'description': 'md5:dfb5c63087b6f35fe0cc0af4fe44287e',
|
||||||
|
'upload_date': '20200130',
|
||||||
|
'series': "C'est la vie",
|
||||||
|
'episode': 'Ennemie jurée',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/neuf-jours-en-hiver-neuf-jours-en-hiver',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://revoir.tv5monde.com/toutes-les-videos/info-societe/le-journal-de-la-rts-edition-du-30-01-20-19h30',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -37,11 +57,7 @@ class TV5MondePlusIE(InfoExtractor):
|
|||||||
if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
|
if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
|
||||||
self.raise_geo_restricted(countries=['FR'])
|
self.raise_geo_restricted(countries=['FR'])
|
||||||
|
|
||||||
series = get_element_by_class('video-detail__title', webpage)
|
title = episode = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
|
||||||
title = episode = get_element_by_class(
|
|
||||||
'video-detail__subtitle', webpage) or series
|
|
||||||
if series and series != title:
|
|
||||||
title = '%s - %s' % (series, title)
|
|
||||||
vpl_data = extract_attributes(self._search_regex(
|
vpl_data = extract_attributes(self._search_regex(
|
||||||
r'(<[^>]+class="video_player_loader"[^>]+>)',
|
r'(<[^>]+class="video_player_loader"[^>]+>)',
|
||||||
webpage, 'video player loader'))
|
webpage, 'video player loader'))
|
||||||
@ -65,15 +81,37 @@ class TV5MondePlusIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'(?s)<div[^>]+class=["\']episode-texte[^>]+>(.+?)</div>', webpage,
|
||||||
|
'description', fatal=False)
|
||||||
|
|
||||||
|
series = self._html_search_regex(
|
||||||
|
r'<p[^>]+class=["\']episode-emission[^>]+>([^<]+)', webpage,
|
||||||
|
'series', default=None)
|
||||||
|
|
||||||
|
if series and series != title:
|
||||||
|
title = '%s - %s' % (series, title)
|
||||||
|
|
||||||
|
upload_date = self._search_regex(
|
||||||
|
r'(?:date_publication|publish_date)["\']\s*:\s*["\'](\d{4}_\d{2}_\d{2})',
|
||||||
|
webpage, 'upload date', default=None)
|
||||||
|
if upload_date:
|
||||||
|
upload_date = upload_date.replace('_', '')
|
||||||
|
|
||||||
|
video_id = self._search_regex(
|
||||||
|
(r'data-guid=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
|
||||||
|
r'id_contenu["\']\s:\s*(\d+)'), webpage, 'video id',
|
||||||
|
default=display_id)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': display_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': clean_html(get_element_by_class('video-detail__description', webpage)),
|
'description': description,
|
||||||
'thumbnail': vpl_data.get('data-image'),
|
'thumbnail': vpl_data.get('data-image'),
|
||||||
'duration': int_or_none(vpl_data.get('data-duration')) or parse_duration(self._html_search_meta('duration', webpage)),
|
'duration': int_or_none(vpl_data.get('data-duration')) or parse_duration(self._html_search_meta('duration', webpage)),
|
||||||
'timestamp': parse_iso8601(self._html_search_meta('uploadDate', webpage)),
|
'upload_date': upload_date,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'episode': episode,
|
|
||||||
'series': series,
|
'series': series,
|
||||||
|
'episode': episode,
|
||||||
}
|
}
|
||||||
|
@ -9,8 +9,8 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class TVAIE(InfoExtractor):
|
class TVAIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://videos\.tva\.ca/details/_(?P<id>\d+)'
|
_VALID_URL = r'https?://videos?\.tva\.ca/details/_(?P<id>\d+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://videos.tva.ca/details/_5596811470001',
|
'url': 'https://videos.tva.ca/details/_5596811470001',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5596811470001',
|
'id': '5596811470001',
|
||||||
@ -24,7 +24,10 @@ class TVAIE(InfoExtractor):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://video.tva.ca/details/_5596811470001',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s'
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -17,7 +17,7 @@ class TwentyFourVideoIE(InfoExtractor):
|
|||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?P<host>
|
(?P<host>
|
||||||
(?:(?:www|porno)\.)?24video\.
|
(?:(?:www|porno?)\.)?24video\.
|
||||||
(?:net|me|xxx|sexy?|tube|adult|site|vip)
|
(?:net|me|xxx|sexy?|tube|adult|site|vip)
|
||||||
)/
|
)/
|
||||||
(?:
|
(?:
|
||||||
@ -62,6 +62,9 @@ class TwentyFourVideoIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.24video.vip/video/view/1044982',
|
'url': 'https://www.24video.vip/video/view/1044982',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://porn.24video.net/video/2640421-vsya-takay',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -575,8 +575,8 @@ class TwitchStreamIE(TwitchBaseIE):
|
|||||||
channel_id = self._match_id(url)
|
channel_id = self._match_id(url)
|
||||||
|
|
||||||
stream = self._call_api(
|
stream = self._call_api(
|
||||||
'kraken/streams/%s?stream_type=all' % channel_id, channel_id,
|
'kraken/streams/%s?stream_type=all' % channel_id.lower(),
|
||||||
'Downloading stream JSON').get('stream')
|
channel_id, 'Downloading stream JSON').get('stream')
|
||||||
|
|
||||||
if not stream:
|
if not stream:
|
||||||
raise ExtractorError('%s is offline' % channel_id, expected=True)
|
raise ExtractorError('%s is offline' % channel_id, expected=True)
|
||||||
|
@ -1,28 +1,62 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse_unquote
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
clean_html,
|
|
||||||
determine_ext,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
parse_duration,
|
|
||||||
try_get,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ViewLiftBaseIE(InfoExtractor):
|
class ViewLiftBaseIE(InfoExtractor):
|
||||||
_DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm)\.com|hoichoi\.tv'
|
_API_BASE = 'https://prod-api.viewlift.com/'
|
||||||
|
_DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm|failarmy|ftfnext|lnppass\.legapallacanestro|moviespree|app\.myoutdoortv|neoufitness|pflmma|theidentitytb)\.com|(?:hoichoi|app\.horseandcountry|kronon|marquee|supercrosslive)\.tv'
|
||||||
|
_SITE_MAP = {
|
||||||
|
'ftfnext': 'lax',
|
||||||
|
'funnyforfree': 'snagfilms',
|
||||||
|
'hoichoi': 'hoichoitv',
|
||||||
|
'kiddovid': 'snagfilms',
|
||||||
|
'laxsportsnetwork': 'lax',
|
||||||
|
'legapallacanestro': 'lnp',
|
||||||
|
'marquee': 'marquee-tv',
|
||||||
|
'monumentalsportsnetwork': 'monumental-network',
|
||||||
|
'moviespree': 'bingeflix',
|
||||||
|
'pflmma': 'pfl',
|
||||||
|
'snagxtreme': 'snagfilms',
|
||||||
|
'theidentitytb': 'tampabay',
|
||||||
|
'vayafilm': 'snagfilms',
|
||||||
|
}
|
||||||
|
_TOKENS = {}
|
||||||
|
|
||||||
|
def _call_api(self, site, path, video_id, query):
|
||||||
|
token = self._TOKENS.get(site)
|
||||||
|
if not token:
|
||||||
|
token_query = {'site': site}
|
||||||
|
email, password = self._get_login_info(netrc_machine=site)
|
||||||
|
if email:
|
||||||
|
resp = self._download_json(
|
||||||
|
self._API_BASE + 'identity/signin', video_id,
|
||||||
|
'Logging in', query=token_query, data=json.dumps({
|
||||||
|
'email': email,
|
||||||
|
'password': password,
|
||||||
|
}).encode())
|
||||||
|
else:
|
||||||
|
resp = self._download_json(
|
||||||
|
self._API_BASE + 'identity/anonymous-token', video_id,
|
||||||
|
'Downloading authorization token', query=token_query)
|
||||||
|
self._TOKENS[site] = token = resp['authorizationToken']
|
||||||
|
return self._download_json(
|
||||||
|
self._API_BASE + path, video_id,
|
||||||
|
headers={'Authorization': token}, query=query)
|
||||||
|
|
||||||
|
|
||||||
class ViewLiftEmbedIE(ViewLiftBaseIE):
|
class ViewLiftEmbedIE(ViewLiftBaseIE):
|
||||||
_VALID_URL = r'https?://(?:(?:www|embed)\.)?(?:%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' % ViewLiftBaseIE._DOMAINS_REGEX
|
IE_NAME = 'viewlift:embed'
|
||||||
|
_VALID_URL = r'https?://(?:(?:www|embed)\.)?(?P<domain>%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' % ViewLiftBaseIE._DOMAINS_REGEX
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
|
'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
|
||||||
'md5': '2924e9215c6eff7a55ed35b72276bd93',
|
'md5': '2924e9215c6eff7a55ed35b72276bd93',
|
||||||
@ -30,6 +64,9 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
|
|||||||
'id': '74849a00-85a9-11e1-9660-123139220831',
|
'id': '74849a00-85a9-11e1-9660-123139220831',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '#whilewewatch',
|
'title': '#whilewewatch',
|
||||||
|
'description': 'md5:b542bef32a6f657dadd0df06e26fb0c8',
|
||||||
|
'timestamp': 1334350096,
|
||||||
|
'upload_date': '20120413',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# invalid labels, 360p is better that 480p
|
# invalid labels, 360p is better that 480p
|
||||||
@ -39,7 +76,8 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
|
|||||||
'id': '17ca0950-a74a-11e0-a92a-0026bb61d036',
|
'id': '17ca0950-a74a-11e0-a92a-0026bb61d036',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Life in Limbo',
|
'title': 'Life in Limbo',
|
||||||
}
|
},
|
||||||
|
'skip': 'The video does not exist',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',
|
'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -54,67 +92,68 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
|
|||||||
return mobj.group('url')
|
return mobj.group('url')
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
domain, film_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
site = domain.split('.')[-2]
|
||||||
webpage = self._download_webpage(url, video_id)
|
if site in self._SITE_MAP:
|
||||||
|
site = self._SITE_MAP[site]
|
||||||
if '>This film is not playable in your area.<' in webpage:
|
try:
|
||||||
raise ExtractorError(
|
content_data = self._call_api(
|
||||||
'Film %s is not playable in your area.' % video_id, expected=True)
|
site, 'entitlement/video/status', film_id, {
|
||||||
|
'id': film_id
|
||||||
|
})['video']
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
|
error_message = self._parse_json(e.cause.read().decode(), film_id).get('errorMessage')
|
||||||
|
if error_message == 'User does not have a valid subscription or has not purchased this content.':
|
||||||
|
self.raise_login_required()
|
||||||
|
raise ExtractorError(error_message, expected=True)
|
||||||
|
raise
|
||||||
|
gist = content_data['gist']
|
||||||
|
title = gist['title']
|
||||||
|
video_assets = content_data['streamingInfo']['videoAssets']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
has_bitrate = False
|
mpeg_video_assets = video_assets.get('mpeg') or []
|
||||||
sources = self._parse_json(self._search_regex(
|
for video_asset in mpeg_video_assets:
|
||||||
r'(?s)sources:\s*(\[.+?\]),', webpage,
|
video_asset_url = video_asset.get('url')
|
||||||
'sources', default='[]'), video_id, js_to_json)
|
if not video_asset:
|
||||||
for source in sources:
|
|
||||||
file_ = source.get('file')
|
|
||||||
if not file_:
|
|
||||||
continue
|
continue
|
||||||
type_ = source.get('type')
|
bitrate = int_or_none(video_asset.get('bitrate'))
|
||||||
ext = determine_ext(file_)
|
height = int_or_none(self._search_regex(
|
||||||
format_id = source.get('label') or ext
|
r'^_?(\d+)[pP]$', video_asset.get('renditionValue'),
|
||||||
if all(v in ('m3u8', 'hls') for v in (type_, ext)):
|
'height', default=None))
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.append({
|
||||||
file_, video_id, 'mp4', 'm3u8_native',
|
'url': video_asset_url,
|
||||||
m3u8_id='hls', fatal=False))
|
'format_id': 'http%s' % ('-%d' % bitrate if bitrate else ''),
|
||||||
else:
|
'tbr': bitrate,
|
||||||
bitrate = int_or_none(self._search_regex(
|
'height': height,
|
||||||
[r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext],
|
'vcodec': video_asset.get('codec'),
|
||||||
file_, 'bitrate', default=None))
|
})
|
||||||
if not has_bitrate and bitrate:
|
|
||||||
has_bitrate = True
|
|
||||||
height = int_or_none(self._search_regex(
|
|
||||||
r'^(\d+)[pP]$', format_id, 'height', default=None))
|
|
||||||
formats.append({
|
|
||||||
'url': file_,
|
|
||||||
'format_id': 'http-%s%s' % (format_id, ('-%dk' % bitrate if bitrate else '')),
|
|
||||||
'tbr': bitrate,
|
|
||||||
'height': height,
|
|
||||||
})
|
|
||||||
if not formats:
|
|
||||||
hls_url = self._parse_json(self._search_regex(
|
|
||||||
r'filmInfo\.src\s*=\s*({.+?});',
|
|
||||||
webpage, 'src'), video_id, js_to_json)['src']
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
hls_url, video_id, 'mp4', 'm3u8_native',
|
|
||||||
m3u8_id='hls', fatal=False)
|
|
||||||
field_preference = None if has_bitrate else ('height', 'tbr', 'format_id')
|
|
||||||
self._sort_formats(formats, field_preference)
|
|
||||||
|
|
||||||
title = self._search_regex(
|
hls_url = video_assets.get('hls')
|
||||||
[r"title\s*:\s*'([^']+)'", r'<title>([^<]+)</title>'],
|
if hls_url:
|
||||||
webpage, 'title')
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
||||||
|
self._sort_formats(formats, ('height', 'tbr', 'format_id'))
|
||||||
|
|
||||||
return {
|
info = {
|
||||||
'id': video_id,
|
'id': film_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
'description': gist.get('description'),
|
||||||
|
'thumbnail': gist.get('videoImageUrl'),
|
||||||
|
'duration': int_or_none(gist.get('runtime')),
|
||||||
|
'age_limit': parse_age_limit(content_data.get('parentalRating')),
|
||||||
|
'timestamp': int_or_none(gist.get('publishDate'), 1000),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
for k in ('categories', 'tags'):
|
||||||
|
info[k] = [v['title'] for v in content_data.get(k, []) if v.get('title')]
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
class ViewLiftIE(ViewLiftBaseIE):
|
class ViewLiftIE(ViewLiftBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)(?:/(?:films/title|show|(?:news/)?videos?))?/(?P<id>[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX
|
IE_NAME = 'viewlift'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)(?P<path>(?:/(?:films/title|show|(?:news/)?videos?|watch))?/(?P<id>[^?#]+))' % ViewLiftBaseIE._DOMAINS_REGEX
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.snagfilms.com/films/title/lost_for_life',
|
'url': 'http://www.snagfilms.com/films/title/lost_for_life',
|
||||||
'md5': '19844f897b35af219773fd63bdec2942',
|
'md5': '19844f897b35af219773fd63bdec2942',
|
||||||
@ -151,10 +190,13 @@ class ViewLiftIE(ViewLiftBaseIE):
|
|||||||
'id': '00000148-7b53-de26-a9fb-fbf306f70020',
|
'id': '00000148-7b53-de26-a9fb-fbf306f70020',
|
||||||
'display_id': 'augie_alone/s_2_ep_12_love',
|
'display_id': 'augie_alone/s_2_ep_12_love',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Augie, Alone:S. 2 Ep. 12 - Love',
|
'title': 'S. 2 Ep. 12 - Love',
|
||||||
'description': 'md5:db2a5c72d994f16a780c1eb353a8f403',
|
'description': 'Augie finds love.',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'duration': 107,
|
'duration': 107,
|
||||||
|
'upload_date': '20141012',
|
||||||
|
'timestamp': 1413129540,
|
||||||
|
'age_limit': 17,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
@ -177,6 +219,9 @@ class ViewLiftIE(ViewLiftBaseIE):
|
|||||||
# Was once Kaltura embed
|
# Was once Kaltura embed
|
||||||
'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15',
|
'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.marquee.tv/watch/sadlerswells-sacredmonsters',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -184,119 +229,22 @@ class ViewLiftIE(ViewLiftBaseIE):
|
|||||||
return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url)
|
return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
domain, display_id = re.match(self._VALID_URL, url).groups()
|
domain, path, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
site = domain.split('.')[-2]
|
||||||
webpage = self._download_webpage(url, display_id)
|
if site in self._SITE_MAP:
|
||||||
|
site = self._SITE_MAP[site]
|
||||||
if ">Sorry, the Film you're looking for is not available.<" in webpage:
|
modules = self._call_api(
|
||||||
raise ExtractorError(
|
site, 'content/pages', display_id, {
|
||||||
'Film %s is not available.' % display_id, expected=True)
|
'includeContent': 'true',
|
||||||
|
'moduleOffset': 1,
|
||||||
initial_store_state = self._search_regex(
|
'path': path,
|
||||||
r"window\.initialStoreState\s*=.*?JSON\.parse\(unescape\(atob\('([^']+)'\)\)\)",
|
'site': site,
|
||||||
webpage, 'Initial Store State', default=None)
|
})['modules']
|
||||||
if initial_store_state:
|
film_id = next(m['contentData'][0]['gist']['id'] for m in modules if m.get('moduleType') == 'VideoDetailModule')
|
||||||
modules = self._parse_json(compat_urllib_parse_unquote(base64.b64decode(
|
return {
|
||||||
initial_store_state).decode()), display_id)['page']['data']['modules']
|
'_type': 'url_transparent',
|
||||||
content_data = next(m['contentData'][0] for m in modules if m.get('moduleType') == 'VideoDetailModule')
|
'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
|
||||||
gist = content_data['gist']
|
'id': film_id,
|
||||||
film_id = gist['id']
|
'display_id': display_id,
|
||||||
title = gist['title']
|
'ie_key': 'ViewLiftEmbed',
|
||||||
video_assets = try_get(
|
}
|
||||||
content_data, lambda x: x['streamingInfo']['videoAssets'], dict)
|
|
||||||
if not video_assets:
|
|
||||||
token = self._download_json(
|
|
||||||
'https://prod-api.viewlift.com/identity/anonymous-token',
|
|
||||||
film_id, 'Downloading authorization token',
|
|
||||||
query={'site': 'snagfilms'})['authorizationToken']
|
|
||||||
video_assets = self._download_json(
|
|
||||||
'https://prod-api.viewlift.com/entitlement/video/status',
|
|
||||||
film_id, headers={
|
|
||||||
'Authorization': token,
|
|
||||||
'Referer': url,
|
|
||||||
}, query={
|
|
||||||
'id': film_id
|
|
||||||
})['video']['streamingInfo']['videoAssets']
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
mpeg_video_assets = video_assets.get('mpeg') or []
|
|
||||||
for video_asset in mpeg_video_assets:
|
|
||||||
video_asset_url = video_asset.get('url')
|
|
||||||
if not video_asset:
|
|
||||||
continue
|
|
||||||
bitrate = int_or_none(video_asset.get('bitrate'))
|
|
||||||
height = int_or_none(self._search_regex(
|
|
||||||
r'^_?(\d+)[pP]$', video_asset.get('renditionValue'),
|
|
||||||
'height', default=None))
|
|
||||||
formats.append({
|
|
||||||
'url': video_asset_url,
|
|
||||||
'format_id': 'http%s' % ('-%d' % bitrate if bitrate else ''),
|
|
||||||
'tbr': bitrate,
|
|
||||||
'height': height,
|
|
||||||
'vcodec': video_asset.get('codec'),
|
|
||||||
})
|
|
||||||
|
|
||||||
hls_url = video_assets.get('hls')
|
|
||||||
if hls_url:
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
|
|
||||||
self._sort_formats(formats, ('height', 'tbr', 'format_id'))
|
|
||||||
|
|
||||||
info = {
|
|
||||||
'id': film_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
|
||||||
'description': gist.get('description'),
|
|
||||||
'thumbnail': gist.get('videoImageUrl'),
|
|
||||||
'duration': int_or_none(gist.get('runtime')),
|
|
||||||
'age_limit': parse_age_limit(content_data.get('parentalRating')),
|
|
||||||
'timestamp': int_or_none(gist.get('publishDate'), 1000),
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
for k in ('categories', 'tags'):
|
|
||||||
info[k] = [v['title'] for v in content_data.get(k, []) if v.get('title')]
|
|
||||||
return info
|
|
||||||
else:
|
|
||||||
film_id = self._search_regex(r'filmId=([\da-f-]{36})"', webpage, 'film id')
|
|
||||||
|
|
||||||
snag = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag', default='[]'),
|
|
||||||
display_id)
|
|
||||||
|
|
||||||
for item in snag:
|
|
||||||
if item.get('data', {}).get('film', {}).get('id') == film_id:
|
|
||||||
data = item['data']['film']
|
|
||||||
title = data['title']
|
|
||||||
description = clean_html(data.get('synopsis'))
|
|
||||||
thumbnail = data.get('image')
|
|
||||||
duration = int_or_none(data.get('duration') or data.get('runtime'))
|
|
||||||
categories = [
|
|
||||||
category['title'] for category in data.get('categories', [])
|
|
||||||
if category.get('title')]
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
title = self._html_search_regex(
|
|
||||||
(r'itemprop="title">([^<]+)<',
|
|
||||||
r'(?s)itemprop="title">(.+?)<div'), webpage, 'title')
|
|
||||||
description = self._html_search_regex(
|
|
||||||
r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>',
|
|
||||||
webpage, 'description', default=None) or self._og_search_description(webpage)
|
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
|
||||||
duration = parse_duration(self._search_regex(
|
|
||||||
r'<span itemprop="duration" class="film-duration strong">([^<]+)<',
|
|
||||||
webpage, 'duration', fatal=False))
|
|
||||||
categories = re.findall(r'<a href="/movies/[^"]+">([^<]+)</a>', webpage)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
|
|
||||||
'id': film_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'duration': duration,
|
|
||||||
'categories': categories,
|
|
||||||
'ie_key': 'ViewLiftEmbed',
|
|
||||||
}
|
|
||||||
|
@ -33,6 +33,7 @@ from ..utils import (
|
|||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
urljoin,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -191,7 +192,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
|||||||
for tt in text_tracks:
|
for tt in text_tracks:
|
||||||
subtitles[tt['lang']] = [{
|
subtitles[tt['lang']] = [{
|
||||||
'ext': 'vtt',
|
'ext': 'vtt',
|
||||||
'url': 'https://vimeo.com' + tt['url'],
|
'url': urljoin('https://vimeo.com', tt['url']),
|
||||||
}]
|
}]
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
@ -584,14 +585,14 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||||||
url = 'https://vimeo.com/' + video_id
|
url = 'https://vimeo.com/' + video_id
|
||||||
elif is_player:
|
elif is_player:
|
||||||
url = 'https://player.vimeo.com/video/' + video_id
|
url = 'https://player.vimeo.com/video/' + video_id
|
||||||
elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
|
elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf', '/album/', '/showcase/')):
|
||||||
url = 'https://vimeo.com/' + video_id
|
url = 'https://vimeo.com/' + video_id
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Retrieve video webpage to extract further information
|
# Retrieve video webpage to extract further information
|
||||||
webpage, urlh = self._download_webpage_handle(
|
webpage, urlh = self._download_webpage_handle(
|
||||||
url, video_id, headers=headers)
|
url, video_id, headers=headers)
|
||||||
redirect_url = compat_str(urlh.geturl())
|
redirect_url = urlh.geturl()
|
||||||
except ExtractorError as ee:
|
except ExtractorError as ee:
|
||||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
|
||||||
errmsg = ee.cause.read()
|
errmsg = ee.cause.read()
|
||||||
@ -841,33 +842,6 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
|
|||||||
return self._TITLE or self._html_search_regex(
|
return self._TITLE or self._html_search_regex(
|
||||||
self._TITLE_RE, webpage, 'list title', fatal=False)
|
self._TITLE_RE, webpage, 'list title', fatal=False)
|
||||||
|
|
||||||
def _login_list_password(self, page_url, list_id, webpage):
|
|
||||||
login_form = self._search_regex(
|
|
||||||
r'(?s)<form[^>]+?id="pw_form"(.*?)</form>',
|
|
||||||
webpage, 'login form', default=None)
|
|
||||||
if not login_form:
|
|
||||||
return webpage
|
|
||||||
|
|
||||||
password = self._downloader.params.get('videopassword')
|
|
||||||
if password is None:
|
|
||||||
raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
|
|
||||||
fields = self._hidden_inputs(login_form)
|
|
||||||
token, vuid = self._extract_xsrft_and_vuid(webpage)
|
|
||||||
fields['token'] = token
|
|
||||||
fields['password'] = password
|
|
||||||
post = urlencode_postdata(fields)
|
|
||||||
password_path = self._search_regex(
|
|
||||||
r'action="([^"]+)"', login_form, 'password URL')
|
|
||||||
password_url = compat_urlparse.urljoin(page_url, password_path)
|
|
||||||
password_request = sanitized_Request(password_url, post)
|
|
||||||
password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
|
|
||||||
self._set_vimeo_cookie('vuid', vuid)
|
|
||||||
self._set_vimeo_cookie('xsrft', token)
|
|
||||||
|
|
||||||
return self._download_webpage(
|
|
||||||
password_request, list_id,
|
|
||||||
'Verifying the password', 'Wrong password')
|
|
||||||
|
|
||||||
def _title_and_entries(self, list_id, base_url):
|
def _title_and_entries(self, list_id, base_url):
|
||||||
for pagenum in itertools.count(1):
|
for pagenum in itertools.count(1):
|
||||||
page_url = self._page_url(base_url, pagenum)
|
page_url = self._page_url(base_url, pagenum)
|
||||||
@ -876,7 +850,6 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
|
|||||||
'Downloading page %s' % pagenum)
|
'Downloading page %s' % pagenum)
|
||||||
|
|
||||||
if pagenum == 1:
|
if pagenum == 1:
|
||||||
webpage = self._login_list_password(page_url, list_id, webpage)
|
|
||||||
yield self._extract_list_title(webpage)
|
yield self._extract_list_title(webpage)
|
||||||
|
|
||||||
# Try extracting href first since not all videos are available via
|
# Try extracting href first since not all videos are available via
|
||||||
@ -923,7 +896,7 @@ class VimeoUserIE(VimeoChannelIE):
|
|||||||
_BASE_URL_TEMPL = 'https://vimeo.com/%s'
|
_BASE_URL_TEMPL = 'https://vimeo.com/%s'
|
||||||
|
|
||||||
|
|
||||||
class VimeoAlbumIE(VimeoChannelIE):
|
class VimeoAlbumIE(VimeoBaseInfoExtractor):
|
||||||
IE_NAME = 'vimeo:album'
|
IE_NAME = 'vimeo:album'
|
||||||
_VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))'
|
_VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))'
|
||||||
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
|
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
|
||||||
@ -973,13 +946,39 @@ class VimeoAlbumIE(VimeoChannelIE):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
album_id = self._match_id(url)
|
album_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, album_id)
|
webpage = self._download_webpage(url, album_id)
|
||||||
webpage = self._login_list_password(url, album_id, webpage)
|
viewer = self._parse_json(self._search_regex(
|
||||||
api_config = self._extract_vimeo_config(webpage, album_id)['api']
|
r'bootstrap_data\s*=\s*({.+?})</script>',
|
||||||
|
webpage, 'bootstrap data'), album_id)['viewer']
|
||||||
|
jwt = viewer['jwt']
|
||||||
|
album = self._download_json(
|
||||||
|
'https://api.vimeo.com/albums/' + album_id,
|
||||||
|
album_id, headers={'Authorization': 'jwt ' + jwt},
|
||||||
|
query={'fields': 'description,name,privacy'})
|
||||||
|
hashed_pass = None
|
||||||
|
if try_get(album, lambda x: x['privacy']['view']) == 'password':
|
||||||
|
password = self._downloader.params.get('videopassword')
|
||||||
|
if not password:
|
||||||
|
raise ExtractorError(
|
||||||
|
'This album is protected by a password, use the --video-password option',
|
||||||
|
expected=True)
|
||||||
|
self._set_vimeo_cookie('vuid', viewer['vuid'])
|
||||||
|
try:
|
||||||
|
hashed_pass = self._download_json(
|
||||||
|
'https://vimeo.com/showcase/%s/auth' % album_id,
|
||||||
|
album_id, 'Verifying the password', data=urlencode_postdata({
|
||||||
|
'password': password,
|
||||||
|
'token': viewer['xsrft'],
|
||||||
|
}), headers={
|
||||||
|
'X-Requested-With': 'XMLHttpRequest',
|
||||||
|
})['hashed_pass']
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||||
|
raise ExtractorError('Wrong password', expected=True)
|
||||||
|
raise
|
||||||
entries = OnDemandPagedList(functools.partial(
|
entries = OnDemandPagedList(functools.partial(
|
||||||
self._fetch_page, album_id, api_config['jwt'],
|
self._fetch_page, album_id, jwt, hashed_pass), self._PAGE_SIZE)
|
||||||
api_config.get('hashed_pass')), self._PAGE_SIZE)
|
return self.playlist_result(
|
||||||
return self.playlist_result(entries, album_id, self._html_search_regex(
|
entries, album_id, album.get('name'), album.get('description'))
|
||||||
r'<title>\s*(.+?)(?:\s+on Vimeo)?</title>', webpage, 'title', fatal=False))
|
|
||||||
|
|
||||||
|
|
||||||
class VimeoGroupsIE(VimeoChannelIE):
|
class VimeoGroupsIE(VimeoChannelIE):
|
||||||
|
@ -45,22 +45,23 @@ class WistiaIE(InfoExtractor):
|
|||||||
# https://wistia.com/support/embed-and-share/video-on-your-website
|
# https://wistia.com/support/embed-and-share/video-on-your-website
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_url(webpage):
|
def _extract_url(webpage):
|
||||||
match = re.search(
|
urls = WistiaIE._extract_urls(webpage)
|
||||||
r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage)
|
return urls[0] if urls else None
|
||||||
if match:
|
|
||||||
return unescapeHTML(match.group('url'))
|
|
||||||
|
|
||||||
match = re.search(
|
@staticmethod
|
||||||
r'''(?sx)
|
def _extract_urls(webpage):
|
||||||
<script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
|
urls = []
|
||||||
<div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]{10})\b.*?\2
|
for match in re.finditer(
|
||||||
''', webpage)
|
r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
|
||||||
if match:
|
urls.append(unescapeHTML(match.group('url')))
|
||||||
return 'wistia:%s' % match.group('id')
|
for match in re.finditer(
|
||||||
|
r'''(?sx)
|
||||||
match = re.search(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage)
|
<div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]{10})\b.*?\2
|
||||||
if match:
|
''', webpage):
|
||||||
return 'wistia:%s' % match.group('id')
|
urls.append('wistia:%s' % match.group('id'))
|
||||||
|
for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
|
||||||
|
urls.append('wistia:%s' % match.group('id'))
|
||||||
|
return urls
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@ -113,7 +113,7 @@ class XHamsterIE(InfoExtractor):
|
|||||||
display_id = mobj.group('display_id') or mobj.group('display_id_2')
|
display_id = mobj.group('display_id') or mobj.group('display_id_2')
|
||||||
|
|
||||||
desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url)
|
desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url)
|
||||||
webpage = self._download_webpage(desktop_url, video_id)
|
webpage, urlh = self._download_webpage_handle(desktop_url, video_id)
|
||||||
|
|
||||||
error = self._html_search_regex(
|
error = self._html_search_regex(
|
||||||
r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>',
|
r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>',
|
||||||
@ -161,6 +161,9 @@ class XHamsterIE(InfoExtractor):
|
|||||||
'ext': determine_ext(format_url, 'mp4'),
|
'ext': determine_ext(format_url, 'mp4'),
|
||||||
'height': get_height(quality),
|
'height': get_height(quality),
|
||||||
'filesize': filesize,
|
'filesize': filesize,
|
||||||
|
'http_headers': {
|
||||||
|
'Referer': urlh.geturl(),
|
||||||
|
},
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@ -47,7 +47,7 @@ class XTubeIE(InfoExtractor):
|
|||||||
'display_id': 'A-Super-Run-Part-1-YT',
|
'display_id': 'A-Super-Run-Part-1-YT',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'A Super Run - Part 1 (YT)',
|
'title': 'A Super Run - Part 1 (YT)',
|
||||||
'description': 'md5:ca0d47afff4a9b2942e4b41aa970fd93',
|
'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616',
|
||||||
'uploader': 'tshirtguy59',
|
'uploader': 'tshirtguy59',
|
||||||
'duration': 579,
|
'duration': 579,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
@ -87,10 +87,24 @@ class XTubeIE(InfoExtractor):
|
|||||||
'Cookie': 'age_verified=1; cookiesAccepted=1',
|
'Cookie': 'age_verified=1; cookiesAccepted=1',
|
||||||
})
|
})
|
||||||
|
|
||||||
sources = self._parse_json(self._search_regex(
|
title, thumbnail, duration = [None] * 3
|
||||||
r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
|
|
||||||
webpage, 'sources', group='sources'), video_id,
|
config = self._parse_json(self._search_regex(
|
||||||
transform_source=js_to_json)
|
r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config',
|
||||||
|
default='{}'), video_id, transform_source=js_to_json, fatal=False)
|
||||||
|
if config:
|
||||||
|
config = config.get('mainRoll')
|
||||||
|
if isinstance(config, dict):
|
||||||
|
title = config.get('title')
|
||||||
|
thumbnail = config.get('poster')
|
||||||
|
duration = int_or_none(config.get('duration'))
|
||||||
|
sources = config.get('sources')
|
||||||
|
|
||||||
|
if isinstance(sources, dict):
|
||||||
|
sources = self._parse_json(self._search_regex(
|
||||||
|
r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
|
||||||
|
webpage, 'sources', group='sources'), video_id,
|
||||||
|
transform_source=js_to_json)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_url in sources.items():
|
for format_id, format_url in sources.items():
|
||||||
@ -102,20 +116,25 @@ class XTubeIE(InfoExtractor):
|
|||||||
self._remove_duplicate_formats(formats)
|
self._remove_duplicate_formats(formats)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = self._search_regex(
|
if not title:
|
||||||
(r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
|
title = self._search_regex(
|
||||||
webpage, 'title', group='title')
|
(r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
|
||||||
description = self._search_regex(
|
webpage, 'title', group='title')
|
||||||
|
description = self._og_search_description(
|
||||||
|
webpage, default=None) or self._html_search_meta(
|
||||||
|
'twitter:description', webpage, default=None) or self._search_regex(
|
||||||
r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False)
|
r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False)
|
||||||
uploader = self._search_regex(
|
uploader = self._search_regex(
|
||||||
(r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"',
|
(r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"',
|
||||||
r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
|
r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
|
||||||
webpage, 'uploader', fatal=False)
|
webpage, 'uploader', fatal=False)
|
||||||
duration = parse_duration(self._search_regex(
|
if not duration:
|
||||||
r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>',
|
duration = parse_duration(self._search_regex(
|
||||||
webpage, 'duration', fatal=False))
|
r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>',
|
||||||
|
webpage, 'duration', fatal=False))
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>',
|
(r'["\']viewsCount["\'][^>]*>(\d+)\s+views',
|
||||||
|
r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>'),
|
||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', fatal=False))
|
||||||
comment_count = str_to_int(self._html_search_regex(
|
comment_count = str_to_int(self._html_search_regex(
|
||||||
r'>Comments? \(([\d,\.]+)\)<',
|
r'>Comments? \(([\d,\.]+)\)<',
|
||||||
@ -126,6 +145,7 @@ class XTubeIE(InfoExtractor):
|
|||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
@ -144,7 +164,7 @@ class XTubeUserIE(InfoExtractor):
|
|||||||
'id': 'greenshowers-4056496',
|
'id': 'greenshowers-4056496',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
'playlist_mincount': 155,
|
'playlist_mincount': 154,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -44,7 +44,7 @@ class YouJizzIE(InfoExtractor):
|
|||||||
|
|
||||||
encodings = self._parse_json(
|
encodings = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'encodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings',
|
r'[Ee]ncodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings',
|
||||||
default='[]'),
|
default='[]'),
|
||||||
video_id, fatal=False)
|
video_id, fatal=False)
|
||||||
for encoding in encodings:
|
for encoding in encodings:
|
||||||
|
@ -29,7 +29,6 @@ from ..compat import (
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
bool_or_none,
|
bool_or_none,
|
||||||
clean_html,
|
clean_html,
|
||||||
dict_get,
|
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -570,7 +569,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'upload_date': '20120506',
|
'upload_date': '20120506',
|
||||||
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
|
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
|
||||||
'alt_title': 'I Love It (feat. Charli XCX)',
|
'alt_title': 'I Love It (feat. Charli XCX)',
|
||||||
'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
|
'description': 'md5:19a2f98d9032b9311e686ed039564f63',
|
||||||
'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
|
'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
|
||||||
'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
|
'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
|
||||||
'iconic ep', 'iconic', 'love', 'it'],
|
'iconic ep', 'iconic', 'love', 'it'],
|
||||||
@ -685,12 +684,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'id': 'nfWlot6h_JM',
|
'id': 'nfWlot6h_JM',
|
||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
'title': 'Taylor Swift - Shake It Off',
|
'title': 'Taylor Swift - Shake It Off',
|
||||||
'description': 'md5:bec2185232c05479482cb5a9b82719bf',
|
'description': 'md5:307195cd21ff7fa352270fe884570ef0',
|
||||||
'duration': 242,
|
'duration': 242,
|
||||||
'uploader': 'TaylorSwiftVEVO',
|
'uploader': 'TaylorSwiftVEVO',
|
||||||
'uploader_id': 'TaylorSwiftVEVO',
|
'uploader_id': 'TaylorSwiftVEVO',
|
||||||
'upload_date': '20140818',
|
'upload_date': '20140818',
|
||||||
'creator': 'Taylor Swift',
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'youtube_include_dash_manifest': True,
|
'youtube_include_dash_manifest': True,
|
||||||
@ -755,11 +753,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'upload_date': '20100430',
|
'upload_date': '20100430',
|
||||||
'uploader_id': 'deadmau5',
|
'uploader_id': 'deadmau5',
|
||||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
|
||||||
'creator': 'deadmau5',
|
'creator': 'Dada Life, deadmau5',
|
||||||
'description': 'md5:12c56784b8032162bb936a5f76d55360',
|
'description': 'md5:12c56784b8032162bb936a5f76d55360',
|
||||||
'uploader': 'deadmau5',
|
'uploader': 'deadmau5',
|
||||||
'title': 'Deadmau5 - Some Chords (HD)',
|
'title': 'Deadmau5 - Some Chords (HD)',
|
||||||
'alt_title': 'Some Chords',
|
'alt_title': 'This Machine Kills Some Chords',
|
||||||
},
|
},
|
||||||
'expected_warnings': [
|
'expected_warnings': [
|
||||||
'DASH manifest missing',
|
'DASH manifest missing',
|
||||||
@ -1135,6 +1133,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
'youtube_include_dash_manifest': False,
|
'youtube_include_dash_manifest': False,
|
||||||
},
|
},
|
||||||
|
'skip': 'not actual anymore',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# Youtube Music Auto-generated description
|
# Youtube Music Auto-generated description
|
||||||
@ -1145,8 +1144,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'title': 'Voyeur Girl',
|
'title': 'Voyeur Girl',
|
||||||
'description': 'md5:7ae382a65843d6df2685993e90a8628f',
|
'description': 'md5:7ae382a65843d6df2685993e90a8628f',
|
||||||
'upload_date': '20190312',
|
'upload_date': '20190312',
|
||||||
'uploader': 'Various Artists - Topic',
|
'uploader': 'Stephen - Topic',
|
||||||
'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
|
'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
|
||||||
'artist': 'Stephen',
|
'artist': 'Stephen',
|
||||||
'track': 'Voyeur Girl',
|
'track': 'Voyeur Girl',
|
||||||
'album': 'it\'s too much love to know my dear',
|
'album': 'it\'s too much love to know my dear',
|
||||||
@ -1210,7 +1209,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'id': '-hcAI0g-f5M',
|
'id': '-hcAI0g-f5M',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Put It On Me',
|
'title': 'Put It On Me',
|
||||||
'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
|
'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
|
||||||
'upload_date': '20180426',
|
'upload_date': '20180426',
|
||||||
'uploader': 'Matt Maeson - Topic',
|
'uploader': 'Matt Maeson - Topic',
|
||||||
'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
|
'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
|
||||||
@ -1256,7 +1255,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||||
id_m = re.match(
|
id_m = re.match(
|
||||||
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
|
r'.*?[-.](?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
|
||||||
player_url)
|
player_url)
|
||||||
if not id_m:
|
if not id_m:
|
||||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
raise ExtractorError('Cannot identify player %r' % player_url)
|
||||||
@ -1708,9 +1707,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
def extract_view_count(v_info):
|
def extract_view_count(v_info):
|
||||||
return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
|
return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
|
||||||
|
|
||||||
def extract_token(v_info):
|
|
||||||
return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
|
|
||||||
|
|
||||||
def extract_player_response(player_response, video_id):
|
def extract_player_response(player_response, video_id):
|
||||||
pl_response = str_or_none(player_response)
|
pl_response = str_or_none(player_response)
|
||||||
if not pl_response:
|
if not pl_response:
|
||||||
@ -1723,6 +1719,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
player_response = {}
|
player_response = {}
|
||||||
|
|
||||||
# Get video info
|
# Get video info
|
||||||
|
video_info = {}
|
||||||
embed_webpage = None
|
embed_webpage = None
|
||||||
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
||||||
age_gate = True
|
age_gate = True
|
||||||
@ -1737,19 +1734,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
|
r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
|
||||||
})
|
})
|
||||||
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
||||||
video_info_webpage = self._download_webpage(
|
try:
|
||||||
video_info_url, video_id,
|
video_info_webpage = self._download_webpage(
|
||||||
note='Refetching age-gated info webpage',
|
video_info_url, video_id,
|
||||||
errnote='unable to download video info webpage')
|
note='Refetching age-gated info webpage',
|
||||||
video_info = compat_parse_qs(video_info_webpage)
|
errnote='unable to download video info webpage')
|
||||||
pl_response = video_info.get('player_response', [None])[0]
|
except ExtractorError:
|
||||||
player_response = extract_player_response(pl_response, video_id)
|
video_info_webpage = None
|
||||||
add_dash_mpd(video_info)
|
if video_info_webpage:
|
||||||
view_count = extract_view_count(video_info)
|
video_info = compat_parse_qs(video_info_webpage)
|
||||||
|
pl_response = video_info.get('player_response', [None])[0]
|
||||||
|
player_response = extract_player_response(pl_response, video_id)
|
||||||
|
add_dash_mpd(video_info)
|
||||||
|
view_count = extract_view_count(video_info)
|
||||||
else:
|
else:
|
||||||
age_gate = False
|
age_gate = False
|
||||||
video_info = None
|
|
||||||
sts = None
|
|
||||||
# Try looking directly into the video webpage
|
# Try looking directly into the video webpage
|
||||||
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
|
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
|
||||||
if ytplayer_config:
|
if ytplayer_config:
|
||||||
@ -1766,61 +1765,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
|
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
|
||||||
if args.get('livestream') == '1' or args.get('live_playback') == 1:
|
if args.get('livestream') == '1' or args.get('live_playback') == 1:
|
||||||
is_live = True
|
is_live = True
|
||||||
sts = ytplayer_config.get('sts')
|
|
||||||
if not player_response:
|
if not player_response:
|
||||||
player_response = extract_player_response(args.get('player_response'), video_id)
|
player_response = extract_player_response(args.get('player_response'), video_id)
|
||||||
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
|
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||||
add_dash_mpd_pr(player_response)
|
add_dash_mpd_pr(player_response)
|
||||||
# We also try looking in get_video_info since it may contain different dashmpd
|
|
||||||
# URL that points to a DASH manifest with possibly different itag set (some itags
|
|
||||||
# are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
|
|
||||||
# manifest pointed by get_video_info's dashmpd).
|
|
||||||
# The general idea is to take a union of itags of both DASH manifests (for example
|
|
||||||
# video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
|
|
||||||
self.report_video_info_webpage_download(video_id)
|
|
||||||
for el in ('embedded', 'detailpage', 'vevo', ''):
|
|
||||||
query = {
|
|
||||||
'video_id': video_id,
|
|
||||||
'ps': 'default',
|
|
||||||
'eurl': '',
|
|
||||||
'gl': 'US',
|
|
||||||
'hl': 'en',
|
|
||||||
}
|
|
||||||
if el:
|
|
||||||
query['el'] = el
|
|
||||||
if sts:
|
|
||||||
query['sts'] = sts
|
|
||||||
video_info_webpage = self._download_webpage(
|
|
||||||
'%s://www.youtube.com/get_video_info' % proto,
|
|
||||||
video_id, note=False,
|
|
||||||
errnote='unable to download video info webpage',
|
|
||||||
fatal=False, query=query)
|
|
||||||
if not video_info_webpage:
|
|
||||||
continue
|
|
||||||
get_video_info = compat_parse_qs(video_info_webpage)
|
|
||||||
if not player_response:
|
|
||||||
pl_response = get_video_info.get('player_response', [None])[0]
|
|
||||||
player_response = extract_player_response(pl_response, video_id)
|
|
||||||
add_dash_mpd(get_video_info)
|
|
||||||
if view_count is None:
|
|
||||||
view_count = extract_view_count(get_video_info)
|
|
||||||
if not video_info:
|
|
||||||
video_info = get_video_info
|
|
||||||
get_token = extract_token(get_video_info)
|
|
||||||
if get_token:
|
|
||||||
# Different get_video_info requests may report different results, e.g.
|
|
||||||
# some may report video unavailability, but some may serve it without
|
|
||||||
# any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
|
|
||||||
# the original webpage as well as el=info and el=embedded get_video_info
|
|
||||||
# requests report video unavailability due to geo restriction while
|
|
||||||
# el=detailpage succeeds and returns valid data). This is probably
|
|
||||||
# due to YouTube measures against IP ranges of hosting providers.
|
|
||||||
# Working around by preferring the first succeeded video_info containing
|
|
||||||
# the token if no such video_info yet was found.
|
|
||||||
token = extract_token(video_info)
|
|
||||||
if not token:
|
|
||||||
video_info = get_video_info
|
|
||||||
break
|
|
||||||
|
|
||||||
def extract_unavailable_message():
|
def extract_unavailable_message():
|
||||||
messages = []
|
messages = []
|
||||||
@ -1833,13 +1781,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if messages:
|
if messages:
|
||||||
return '\n'.join(messages)
|
return '\n'.join(messages)
|
||||||
|
|
||||||
if not video_info:
|
if not video_info and not player_response:
|
||||||
unavailable_message = extract_unavailable_message()
|
unavailable_message = extract_unavailable_message()
|
||||||
if not unavailable_message:
|
if not unavailable_message:
|
||||||
unavailable_message = 'Unable to extract video data'
|
unavailable_message = 'Unable to extract video data'
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
|
'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
|
||||||
|
|
||||||
|
if not isinstance(video_info, dict):
|
||||||
|
video_info = {}
|
||||||
|
|
||||||
video_details = try_get(
|
video_details = try_get(
|
||||||
player_response, lambda x: x['videoDetails'], dict) or {}
|
player_response, lambda x: x['videoDetails'], dict) or {}
|
||||||
|
|
||||||
@ -2035,7 +1986,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
else:
|
else:
|
||||||
player_version = self._search_regex(
|
player_version = self._search_regex(
|
||||||
[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
|
[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
|
||||||
r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
|
r'(?:www|player(?:_ias)?)[-.]([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
|
||||||
player_url,
|
player_url,
|
||||||
'html5 player', fatal=False)
|
'html5 player', fatal=False)
|
||||||
player_desc = 'html5 player %s' % player_version
|
player_desc = 'html5 player %s' % player_version
|
||||||
@ -2392,30 +2343,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
f['stretched_ratio'] = ratio
|
f['stretched_ratio'] = ratio
|
||||||
|
|
||||||
if not formats:
|
if not formats:
|
||||||
token = extract_token(video_info)
|
if 'reason' in video_info:
|
||||||
if not token:
|
if 'The uploader has not made this video available in your country.' in video_info['reason']:
|
||||||
if 'reason' in video_info:
|
regions_allowed = self._html_search_meta(
|
||||||
if 'The uploader has not made this video available in your country.' in video_info['reason']:
|
'regionsAllowed', video_webpage, default=None)
|
||||||
regions_allowed = self._html_search_meta(
|
countries = regions_allowed.split(',') if regions_allowed else None
|
||||||
'regionsAllowed', video_webpage, default=None)
|
self.raise_geo_restricted(
|
||||||
countries = regions_allowed.split(',') if regions_allowed else None
|
msg=video_info['reason'][0], countries=countries)
|
||||||
self.raise_geo_restricted(
|
reason = video_info['reason'][0]
|
||||||
msg=video_info['reason'][0], countries=countries)
|
if 'Invalid parameters' in reason:
|
||||||
reason = video_info['reason'][0]
|
unavailable_message = extract_unavailable_message()
|
||||||
if 'Invalid parameters' in reason:
|
if unavailable_message:
|
||||||
unavailable_message = extract_unavailable_message()
|
reason = unavailable_message
|
||||||
if unavailable_message:
|
raise ExtractorError(
|
||||||
reason = unavailable_message
|
'YouTube said: %s' % reason,
|
||||||
raise ExtractorError(
|
expected=True, video_id=video_id)
|
||||||
'YouTube said: %s' % reason,
|
if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
|
||||||
expected=True, video_id=video_id)
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
else:
|
|
||||||
raise ExtractorError(
|
|
||||||
'"token" parameter not in video info for unknown reason',
|
|
||||||
video_id=video_id)
|
|
||||||
|
|
||||||
if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
|
|
||||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
@ -2495,20 +2439,23 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
|||||||
_VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
|
_VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
|
||||||
IE_NAME = 'youtube:playlist'
|
IE_NAME = 'youtube:playlist'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
|
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'ytdl test PL',
|
'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
|
||||||
'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
|
'uploader': 'Sergey M.',
|
||||||
|
'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
|
||||||
|
'title': 'youtube-dl public playlist',
|
||||||
},
|
},
|
||||||
'playlist_count': 3,
|
'playlist_count': 1,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
|
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
|
'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
|
||||||
'title': 'YDL_Empty_List',
|
'uploader': 'Sergey M.',
|
||||||
|
'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
|
||||||
|
'title': 'youtube-dl empty playlist',
|
||||||
},
|
},
|
||||||
'playlist_count': 0,
|
'playlist_count': 0,
|
||||||
'skip': 'This playlist is private',
|
|
||||||
}, {
|
}, {
|
||||||
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
|
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
|
||||||
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
|
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
|
||||||
@ -2518,7 +2465,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
|||||||
'uploader': 'Christiaan008',
|
'uploader': 'Christiaan008',
|
||||||
'uploader_id': 'ChRiStIaAn008',
|
'uploader_id': 'ChRiStIaAn008',
|
||||||
},
|
},
|
||||||
'playlist_count': 95,
|
'playlist_count': 96,
|
||||||
}, {
|
}, {
|
||||||
'note': 'issue #673',
|
'note': 'issue #673',
|
||||||
'url': 'PLBB231211A4F62143',
|
'url': 'PLBB231211A4F62143',
|
||||||
|
@ -29,7 +29,6 @@ class ZapiksIE(InfoExtractor):
|
|||||||
'timestamp': 1359044972,
|
'timestamp': 1359044972,
|
||||||
'upload_date': '20130124',
|
'upload_date': '20130124',
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
'comment_count': int,
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -244,14 +244,14 @@ class ZDFChannelIE(ZDFBaseIE):
|
|||||||
'id': 'das-aktuelle-sportstudio',
|
'id': 'das-aktuelle-sportstudio',
|
||||||
'title': 'das aktuelle sportstudio | ZDF',
|
'title': 'das aktuelle sportstudio | ZDF',
|
||||||
},
|
},
|
||||||
'playlist_count': 21,
|
'playlist_mincount': 23,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.zdf.de/dokumentation/planet-e',
|
'url': 'https://www.zdf.de/dokumentation/planet-e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'planet-e',
|
'id': 'planet-e',
|
||||||
'title': 'planet e.',
|
'title': 'planet e.',
|
||||||
},
|
},
|
||||||
'playlist_count': 4,
|
'playlist_mincount': 50,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.zdf.de/filme/taunuskrimi/',
|
'url': 'https://www.zdf.de/filme/taunuskrimi/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -134,7 +134,7 @@ def parseOpts(overrideArguments=None):
|
|||||||
action='help',
|
action='help',
|
||||||
help='Print this help text and exit')
|
help='Print this help text and exit')
|
||||||
general.add_option(
|
general.add_option(
|
||||||
'-v', '--version',
|
'--version',
|
||||||
action='version',
|
action='version',
|
||||||
help='Print program version and exit')
|
help='Print program version and exit')
|
||||||
general.add_option(
|
general.add_option(
|
||||||
|
@ -9,6 +9,7 @@ import subprocess
|
|||||||
import sys
|
import sys
|
||||||
from zipimport import zipimporter
|
from zipimport import zipimporter
|
||||||
|
|
||||||
|
from .compat import compat_realpath
|
||||||
from .utils import encode_compat_str
|
from .utils import encode_compat_str
|
||||||
|
|
||||||
from .version import __version__
|
from .version import __version__
|
||||||
@ -84,7 +85,9 @@ def update_self(to_screen, verbose, opener):
|
|||||||
print_notes(to_screen, versions_info['versions'])
|
print_notes(to_screen, versions_info['versions'])
|
||||||
|
|
||||||
# sys.executable is set to the full pathname of the exe-file for py2exe
|
# sys.executable is set to the full pathname of the exe-file for py2exe
|
||||||
filename = sys.executable if hasattr(sys, 'frozen') else sys.argv[0]
|
# though symlinks are not followed so that we need to do this manually
|
||||||
|
# with help of realpath
|
||||||
|
filename = compat_realpath(sys.executable if hasattr(sys, 'frozen') else sys.argv[0])
|
||||||
|
|
||||||
if not os.access(filename, os.W_OK):
|
if not os.access(filename, os.W_OK):
|
||||||
to_screen('ERROR: no write permissions on %s' % filename)
|
to_screen('ERROR: no write permissions on %s' % filename)
|
||||||
|
@ -2752,6 +2752,11 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
|
|||||||
for line in f:
|
for line in f:
|
||||||
if line.startswith(self._HTTPONLY_PREFIX):
|
if line.startswith(self._HTTPONLY_PREFIX):
|
||||||
line = line[len(self._HTTPONLY_PREFIX):]
|
line = line[len(self._HTTPONLY_PREFIX):]
|
||||||
|
# Cookie file may contain spaces instead of tabs.
|
||||||
|
# Replace all spaces with tabs to make such cookie files work
|
||||||
|
# with MozillaCookieJar.
|
||||||
|
if not line.startswith('#'):
|
||||||
|
line = re.sub(r' +', r'\t', line)
|
||||||
cf.write(compat_str(line))
|
cf.write(compat_str(line))
|
||||||
cf.seek(0)
|
cf.seek(0)
|
||||||
self._really_load(cf, filename, ignore_discard, ignore_expires)
|
self._really_load(cf, filename, ignore_discard, ignore_expires)
|
||||||
@ -2795,6 +2800,15 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
|
|||||||
https_response = http_response
|
https_response = http_response
|
||||||
|
|
||||||
|
|
||||||
|
class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
|
||||||
|
if sys.version_info[0] < 3:
|
||||||
|
def redirect_request(self, req, fp, code, msg, headers, newurl):
|
||||||
|
# On python 2 urlh.geturl() may sometimes return redirect URL
|
||||||
|
# as byte string instead of unicode. This workaround allows
|
||||||
|
# to force it always return unicode.
|
||||||
|
return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
|
||||||
|
|
||||||
|
|
||||||
def extract_timezone(date_str):
|
def extract_timezone(date_str):
|
||||||
m = re.search(
|
m = re.search(
|
||||||
r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
|
r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2020.01.24'
|
__version__ = '2020.03.08'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user