Merge branch 'master' into jsinterp

# Conflicts:
#	youtube_dl/extractor/openload.py
This commit is contained in:
sulyi 2016-12-09 23:54:33 +01:00
commit c5c1273ba5
38 changed files with 681 additions and 396 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.18*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.09*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.18** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.09**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.11.18 [debug] youtube-dl version 2016.12.09
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

1
.gitignore vendored
View File

@ -33,6 +33,7 @@ updates_key.pem
*.wav *.wav
*.ape *.ape
*.mkv *.mkv
*.swf
*.part *.part
*.swp *.swp
test/testdata test/testdata

View File

@ -1,8 +1,60 @@
version <unreleased> version 2016.12.09
Core
* [socks] Fix error reporting (#11355)
Extractors Extractors
- [Crunchyroll] ScaledBorderAndShadow are removed from ASS subtitles * [openload] Fix extraction (#10408)
(#8207, #9028) * [pandoratv] Fix extraction (#11023)
+ [telebruxelles] Add support for emission URLs
* [telebruxelles] Extract all formats
+ [bloomberg] Add another video id regular expression (#11371)
* [fusion] Update ooyala id regular expression (#11364)
+ [1tv] Add support for playlists (#11335)
* [1tv] Improve extraction (#11335)
+ [aenetworks] Extract more formats (#11321)
+ [thisoldhouse] Recognize /tv-episode/ URLs (#11271)
version 2016.12.01
Extractors
* [soundcloud] Update client id (#11327)
* [ruutu] Detect DRM protected videos
+ [liveleak] Add support for youtube embeds (#10688)
* [spike] Fix full episodes support (#11312)
* [comedycentral] Fix full episodes support
* [normalboots] Rewrite in terms of JWPlatform (#11184)
* [teamfourstar] Rewrite in terms of JWPlatform (#11184)
- [screenwavemedia] Remove extractor (#11184)
version 2016.11.27
Extractors
+ [webcaster] Add support for webcaster.pro
+ [azubu] Add support for azubu.uol.com.br (#11305)
* [viki] Prefer hls formats
* [viki] Fix rtmp formats extraction (#11255)
* [puls4] Relax URL regular expression (#11267)
* [vevo] Improve artist extraction (#10911)
* [mitele] Relax URL regular expression and extract more metadata (#11244)
+ [cbslocal] Recognize New York site (#11285)
+ [youtube:playlist] Pass disable_polymer in URL query (#11193)
version 2016.11.22
Extractors
* [hellporno] Fix video extension extraction (#11247)
+ [hellporno] Add support for hellporno.net (#11247)
+ [amcnetworks] Recognize more BBC America URLs (#11263)
* [funnyordie] Improve extraction (#11208)
* [extractor/generic] Improve limelight embeds support
- [crunchyroll] Remove ScaledBorderAndShadow from ASS subtitles (#8207, #9028)
* [bandcamp] Fix free downloads extraction and extract all formats (#11067)
* [twitter:card] Relax URL regular expression (#11225)
+ [tvanouvelles] Add support for tvanouvelles.ca (#10616)
version 2016.11.18 version 2016.11.18

View File

@ -1,7 +1,7 @@
all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
clean: clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe
find . -name "*.pyc" -delete find . -name "*.pyc" -delete
find . -name "*.class" -delete find . -name "*.class" -delete

View File

@ -664,7 +664,7 @@ $ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'
# Download best format available but not better that 480p # Download best format available but not better that 480p
$ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]' $ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]'
# Download best video only format but no bigger that 50 MB # Download best video only format but no bigger than 50 MB
$ youtube-dl -f 'best[filesize<50M]' $ youtube-dl -f 'best[filesize<50M]'
# Download best format available via direct link over HTTP/HTTPS protocol # Download best format available via direct link over HTTP/HTTPS protocol

View File

@ -158,6 +158,7 @@
- **CollegeRama** - **CollegeRama**
- **ComCarCoff** - **ComCarCoff**
- **ComedyCentral** - **ComedyCentral**
- **ComedyCentralFullEpisodes**
- **ComedyCentralShortname** - **ComedyCentralShortname**
- **ComedyCentralTV** - **ComedyCentralTV**
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
@ -643,7 +644,6 @@
- **Screencast** - **Screencast**
- **ScreencastOMatic** - **ScreencastOMatic**
- **ScreenJunkies** - **ScreenJunkies**
- **ScreenwaveMedia**
- **Seeker** - **Seeker**
- **SenateISVP** - **SenateISVP**
- **SendtoNews** - **SendtoNews**
@ -715,7 +715,7 @@
- **teachertube:user:collection**: teachertube.com user and collection videos - **teachertube:user:collection**: teachertube.com user and collection videos
- **TeachingChannel** - **TeachingChannel**
- **Teamcoco** - **Teamcoco**
- **TeamFour** - **TeamFourStar**
- **TechTalks** - **TechTalks**
- **techtv.mit.edu** - **techtv.mit.edu**
- **ted** - **ted**
@ -771,6 +771,8 @@
- **TV2Article** - **TV2Article**
- **TV3** - **TV3**
- **TV4**: tv4.se and tv4play.se - **TV4**: tv4.se and tv4play.se
- **TVANouvelles**
- **TVANouvellesArticle**
- **TVC** - **TVC**
- **TVCArticle** - **TVCArticle**
- **tvigle**: Интернет-телевидение Tvigle.ru - **tvigle**: Интернет-телевидение Tvigle.ru
@ -880,6 +882,8 @@
- **WatchIndianPorn**: Watch Indian Porn - **WatchIndianPorn**: Watch Indian Porn
- **WDR** - **WDR**
- **wdr:mobile** - **wdr:mobile**
- **Webcaster**
- **WebcasterFeed**
- **WebOfStories** - **WebOfStories**
- **WebOfStoriesPlaylist** - **WebOfStoriesPlaylist**
- **WeiqiTV**: WQTV - **WeiqiTV**: WQTV

View File

@ -26,7 +26,7 @@ class AENetworksIE(AENetworksBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)' _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
'md5': '8ff93eb073449f151d6b90c0ae1ef0c7', 'md5': 'a97a65f7e823ae10e9244bc5433d5fe6',
'info_dict': { 'info_dict': {
'id': '22253814', 'id': '22253814',
'ext': 'mp4', 'ext': 'mp4',
@ -99,7 +99,7 @@ class AENetworksIE(AENetworksBaseIE):
query = { query = {
'mbr': 'true', 'mbr': 'true',
'assetTypes': 'medium_video_s3' 'assetTypes': 'high_video_s3'
} }
video_id = self._html_search_meta('aetn:VideoID', webpage) video_id = self._html_search_meta('aetn:VideoID', webpage)
media_url = self._search_regex( media_url = self._search_regex(
@ -155,7 +155,7 @@ class HistoryTopicIE(AENetworksBaseIE):
'id': 'world-war-i-history', 'id': 'world-war-i-history',
'title': 'World War I History', 'title': 'World War I History',
}, },
'playlist_mincount': 24, 'playlist_mincount': 23,
}, { }, {
'url': 'http://www.history.com/topics/world-war-i-history/videos', 'url': 'http://www.history.com/topics/world-war-i-history/videos',
'only_matching': True, 'only_matching': True,
@ -193,7 +193,8 @@ class HistoryTopicIE(AENetworksBaseIE):
return self.theplatform_url_result( return self.theplatform_url_result(
release_url, video_id, { release_url, video_id, {
'mbr': 'true', 'mbr': 'true',
'switch': 'hls' 'switch': 'hls',
'assetTypes': 'high_video_ak',
}) })
else: else:
webpage = self._download_webpage(url, topic_id) webpage = self._download_webpage(url, topic_id)
@ -203,6 +204,7 @@ class HistoryTopicIE(AENetworksBaseIE):
entries.append(self.theplatform_url_result( entries.append(self.theplatform_url_result(
video_attributes['data-release-url'], video_attributes['data-id'], { video_attributes['data-release-url'], video_attributes['data-id'], {
'mbr': 'true', 'mbr': 'true',
'switch': 'hls' 'switch': 'hls',
'assetTypes': 'high_video_ak',
})) }))
return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage)) return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage))

View File

@ -10,7 +10,7 @@ from ..utils import (
class AMCNetworksIE(ThePlatformIE): class AMCNetworksIE(ThePlatformIE):
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?season-\d+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?[^/]+/episode-\d+(?:-(?:[^/]+/)?|/))(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1', 'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
'md5': '', 'md5': '',
@ -41,6 +41,9 @@ class AMCNetworksIE(ThePlatformIE):
}, { }, {
'url': 'http://www.ifc.com/movies/chaos', 'url': 'http://www.ifc.com/movies/chaos',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -11,7 +11,7 @@ from ..utils import (
class AzubuIE(InfoExtractor): class AzubuIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?azubu\.tv/[^/]+#!/play/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/[^/]+#!/play/(?P<id>\d+)'
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1', 'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1',
@ -103,12 +103,15 @@ class AzubuIE(InfoExtractor):
class AzubuLiveIE(InfoExtractor): class AzubuLiveIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?azubu\.tv/(?P<id>[^/]+)$' _VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/(?P<id>[^/]+)$'
_TEST = { _TESTS = [{
'url': 'http://www.azubu.tv/MarsTVMDLen', 'url': 'http://www.azubu.tv/MarsTVMDLen',
'only_matching': True, 'only_matching': True,
} }, {
'url': 'http://azubu.uol.com.br/adolfz',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
user = self._match_id(url) user = self._match_id(url)

View File

@ -45,7 +45,8 @@ class BloombergIE(InfoExtractor):
name = self._match_id(url) name = self._match_id(url)
webpage = self._download_webpage(url, name) webpage = self._download_webpage(url, name)
video_id = self._search_regex( video_id = self._search_regex(
r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>.+?)\1', (r'["\']bmmrId["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
r'videoId\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
webpage, 'id', group='url', default=None) webpage, 'id', group='url', default=None)
if not video_id: if not video_id:
bplayer_data = self._parse_json(self._search_regex( bplayer_data = self._parse_json(self._search_regex(

View File

@ -4,11 +4,14 @@ from __future__ import unicode_literals
from .anvato import AnvatoIE from .anvato import AnvatoIE
from .sendtonews import SendtoNewsIE from .sendtonews import SendtoNewsIE
from ..compat import compat_urlparse from ..compat import compat_urlparse
from ..utils import unified_timestamp from ..utils import (
parse_iso8601,
unified_timestamp,
)
class CBSLocalIE(AnvatoIE): class CBSLocalIE(AnvatoIE):
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)' _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
_TESTS = [{ _TESTS = [{
# Anvato backend # Anvato backend
@ -49,6 +52,31 @@ class CBSLocalIE(AnvatoIE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
'info_dict': {
'id': '3580809',
'ext': 'mp4',
'title': 'A Very Blue Anniversary',
'description': 'CBS2s Cindy Hsu has more.',
'thumbnail': 're:^https?://.*',
'timestamp': 1479962220,
'upload_date': '20161124',
'uploader': 'CBS',
'subtitles': {
'en': 'mincount:5',
},
'categories': [
'Stations\\Spoken Word\\WCBSTV',
'Syndication\\AOL',
'Syndication\\MSN',
'Syndication\\NDN',
'Syndication\\Yahoo',
'Content\\News',
'Content\\News\\Local News',
],
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -64,8 +92,11 @@ class CBSLocalIE(AnvatoIE):
info_dict = self._extract_anvato_videos(webpage, display_id) info_dict = self._extract_anvato_videos(webpage, display_id)
time_str = self._html_search_regex( time_str = self._html_search_regex(
r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False) r'class="entry-date">([^<]+)<', webpage, 'released date', default=None)
if time_str:
timestamp = unified_timestamp(time_str) timestamp = unified_timestamp(time_str)
else:
timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage))
info_dict.update({ info_dict.update({
'display_id': display_id, 'display_id': display_id,

View File

@ -6,7 +6,7 @@ from .common import InfoExtractor
class ComedyCentralIE(MTVServicesInfoExtractor): class ComedyCentralIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/ _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
(video-clips|episodes|cc-studios|video-collections|full-episodes|shows) (video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes)))
/(?P<title>.*)''' /(?P<title>.*)'''
_FEED_URL = 'http://comedycentral.com/feeds/mrss/' _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
@ -27,6 +27,40 @@ class ComedyCentralIE(MTVServicesInfoExtractor):
}] }]
class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
(?:full-episodes|shows(?=/[^/]+/full-episodes))
/(?P<id>[^?]+)'''
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
_TESTS = [{
'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028',
'info_dict': {
'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."',
'title': 'November 28, 2016 - Ryan Speedo Green',
},
'playlist_count': 4,
}, {
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
'only_matching': True,
}]
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
feed_json = self._search_regex(r'var triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, 'triforce feeed')
feed = self._parse_json(feed_json, playlist_id)
zones = feed['manifest']['zones']
video_zone = zones['t2_lc_promo1']
feed = self._download_json(video_zone['feed'], playlist_id)
mgid = feed['result']['data']['id']
videos_info = self._get_videos_info(mgid)
return videos_info
class ToshIE(MTVServicesInfoExtractor): class ToshIE(MTVServicesInfoExtractor):
IE_DESC = 'Tosh.0' IE_DESC = 'Tosh.0'
_VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)' _VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'

View File

@ -180,6 +180,7 @@ from .cnn import (
from .coub import CoubIE from .coub import CoubIE
from .collegerama import CollegeRamaIE from .collegerama import CollegeRamaIE
from .comedycentral import ( from .comedycentral import (
ComedyCentralFullEpisodesIE,
ComedyCentralIE, ComedyCentralIE,
ComedyCentralShortnameIE, ComedyCentralShortnameIE,
ComedyCentralTVIE, ComedyCentralTVIE,
@ -804,7 +805,6 @@ from .scivee import SciVeeIE
from .screencast import ScreencastIE from .screencast import ScreencastIE
from .screencastomatic import ScreencastOMaticIE from .screencastomatic import ScreencastOMaticIE
from .screenjunkies import ScreenJunkiesIE from .screenjunkies import ScreenJunkiesIE
from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
from .seeker import SeekerIE from .seeker import SeekerIE
from .senateisvp import SenateISVPIE from .senateisvp import SenateISVPIE
from .sendtonews import SendtoNewsIE from .sendtonews import SendtoNewsIE
@ -897,6 +897,7 @@ from .teachertube import (
) )
from .teachingchannel import TeachingChannelIE from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE from .teamcoco import TeamcocoIE
from .teamfourstar import TeamFourStarIE
from .techtalks import TechTalksIE from .techtalks import TechTalksIE
from .ted import TEDIE from .ted import TEDIE
from .tele13 import Tele13IE from .tele13 import Tele13IE
@ -1121,6 +1122,10 @@ from .wdr import (
WDRIE, WDRIE,
WDRMobileIE, WDRMobileIE,
) )
from .webcaster import (
WebcasterIE,
WebcasterFeedIE,
)
from .webofstories import ( from .webofstories import (
WebOfStoriesIE, WebOfStoriesIE,
WebOfStoriesPlaylistIE, WebOfStoriesPlaylistIE,

View File

@ -2,7 +2,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse from ..compat import (
compat_str,
compat_urlparse,
)
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
qualities, qualities,
@ -22,8 +25,7 @@ class FirstTVIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '40049', 'id': '40049',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015', 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015',
'description': 'md5:36a39c1d19618fec57d12efe212a8370',
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
'upload_date': '20150212', 'upload_date': '20150212',
'duration': 2694, 'duration': 2694,
@ -34,8 +36,7 @@ class FirstTVIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '364746', 'id': '364746',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016', 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016',
'description': 'md5:a242eea0031fd180a4497d52640a9572',
'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$',
'upload_date': '20160407', 'upload_date': '20160407',
'duration': 179, 'duration': 179,
@ -44,6 +45,17 @@ class FirstTVIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'http://www.1tv.ru/news/issue/2016-12-01/14:00',
'info_dict': {
'id': '14:00',
'title': 'Выпуск новостей в 14:00 1 декабря 2016 года. Новости. Первый канал',
'description': 'md5:2e921b948f8c1ff93901da78ebdb1dfd',
},
'playlist_count': 13,
}, {
'url': 'http://www.1tv.ru/shows/tochvtoch-supersezon/vystupleniya/evgeniy-dyatlov-vladimir-vysockiy-koni-priveredlivye-toch-v-toch-supersezon-fragment-vypuska-ot-06-11-2016',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -51,43 +63,66 @@ class FirstTVIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
playlist_url = compat_urlparse.urljoin(url, self._search_regex( playlist_url = compat_urlparse.urljoin(url, self._search_regex(
r'data-playlist-url="([^"]+)', webpage, 'playlist url')) r'data-playlist-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'playlist url', group='url'))
item = self._download_json(playlist_url, display_id)[0] parsed_url = compat_urlparse.urlparse(playlist_url)
video_id = item['id'] qs = compat_urlparse.parse_qs(parsed_url.query)
quality = qualities(('ld', 'sd', 'hd', )) item_ids = qs.get('videos_ids[]') or qs.get('news_ids[]')
items = self._download_json(playlist_url, display_id)
if item_ids:
items = [
item for item in items
if item.get('uid') and compat_str(item['uid']) in item_ids]
else:
items = [items[0]]
entries = []
QUALITIES = ('ld', 'sd', 'hd', )
for item in items:
title = item['title']
quality = qualities(QUALITIES)
formats = [] formats = []
for f in item.get('mbr', []): for f in item.get('mbr', []):
src = f.get('src') src = f.get('src')
if not src: if not src or not isinstance(src, compat_str):
continue continue
fname = f.get('name') tbr = int_or_none(self._search_regex(
r'_(\d{3,})\.mp4', src, 'tbr', default=None))
formats.append({ formats.append({
'url': src, 'url': src,
'format_id': fname, 'format_id': f.get('name'),
'quality': quality(fname), 'tbr': tbr,
'quality': quality(f.get('name')),
}) })
self._sort_formats(formats) self._sort_formats(formats)
thumbnail = item.get('poster') or self._og_search_thumbnail(webpage)
duration = int_or_none(item.get('duration') or self._html_search_meta(
'video:duration', webpage, 'video duration', fatal=False))
upload_date = unified_strdate(self._html_search_meta(
'ya:ovs:upload_date', webpage, 'upload date', default=None))
entries.append({
'id': compat_str(item.get('id') or item['uid']),
'thumbnail': thumbnail,
'title': title,
'upload_date': upload_date,
'duration': int_or_none(duration),
'formats': formats
})
title = self._html_search_regex( title = self._html_search_regex(
(r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>',
r"'title'\s*:\s*'([^']+)'"), r"'title'\s*:\s*'([^']+)'"),
webpage, 'title', default=None) or item['title'] webpage, 'title', default=None) or self._og_search_title(
webpage, default=None)
description = self._html_search_regex( description = self._html_search_regex(
r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>', r'<div class="descr">\s*<div>&nbsp;</div>\s*<p>([^<]*)</p></div>',
webpage, 'description', default=None) or self._html_search_meta( webpage, 'description', default=None) or self._html_search_meta(
'description', webpage, 'description') 'description', webpage, 'description', default=None)
duration = int_or_none(self._html_search_meta(
'video:duration', webpage, 'video duration', fatal=False))
upload_date = unified_strdate(self._html_search_meta(
'ya:ovs:upload_date', webpage, 'upload date', fatal=False))
return { return self.playlist_result(entries, display_id, title, description)
'id': video_id,
'thumbnail': item.get('poster') or self._og_search_thumbnail(webpage),
'title': title,
'description': description,
'upload_date': upload_date,
'duration': int_or_none(duration),
'formats': formats
}

View File

@ -28,6 +28,9 @@ class FunnyOrDieIE(InfoExtractor):
'description': 'Please use this to sell something. www.jonlajoie.com', 'description': 'Please use this to sell something. www.jonlajoie.com',
'thumbnail': 're:^http:.*\.jpg$', 'thumbnail': 're:^http:.*\.jpg$',
}, },
'params': {
'skip_download': True,
},
}, { }, {
'url': 'http://www.funnyordie.com/articles/ebf5e34fc8/10-hours-of-walking-in-nyc-as-a-man', 'url': 'http://www.funnyordie.com/articles/ebf5e34fc8/10-hours-of-walking-in-nyc-as-a-man',
'only_matching': True, 'only_matching': True,
@ -51,19 +54,45 @@ class FunnyOrDieIE(InfoExtractor):
formats = [] formats = []
formats.extend(self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False)
source_formats = list(filter(
lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
m3u8_formats))
bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)[,/]', m3u8_url)] bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)]
bitrates.sort() bitrates.sort()
for bitrate in bitrates: if source_formats:
for link in links: self._sort_formats(source_formats)
formats.append({
'url': self._proto_relative_url('%s%d.%s' % (link[0], bitrate, link[1])), for bitrate, f in zip(bitrates, source_formats or [{}] * len(bitrates)):
'format_id': '%s-%d' % (link[1], bitrate), for path, ext in links:
ff = f.copy()
if ff:
if ext != 'mp4':
ff = dict(
[(k, v) for k, v in ff.items()
if k in ('height', 'width', 'format_id')])
ff.update({
'format_id': ff['format_id'].replace('hls', ext),
'ext': ext,
'protocol': 'http',
})
else:
ff.update({
'format_id': '%s-%d' % (ext, bitrate),
'vbr': bitrate, 'vbr': bitrate,
}) })
ff['url'] = self._proto_relative_url(
'%s%d.%s' % (path, bitrate, ext))
formats.append(ff)
self._check_formats(formats, video_id)
formats.extend(m3u8_formats)
self._sort_formats(
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
subtitles = {} subtitles = {}
for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage): for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage):

View File

@ -29,7 +29,7 @@ class FusionIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
ooyala_code = self._search_regex( ooyala_code = self._search_regex(
r'data-video-id=(["\'])(?P<code>.+?)\1', r'data-ooyala-id=(["\'])(?P<code>(?:(?!\1).)+)\1',
webpage, 'ooyala code', group='code') webpage, 'ooyala code', group='code')
return OoyalaIE._build_url_result(ooyala_code) return OoyalaIE._build_url_result(ooyala_code)

View File

@ -56,10 +56,10 @@ from .dailymotion import (
) )
from .onionstudios import OnionStudiosIE from .onionstudios import OnionStudiosIE
from .viewlift import ViewLiftEmbedIE from .viewlift import ViewLiftEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE from .mtv import MTVServicesEmbeddedIE
from .pladform import PladformIE from .pladform import PladformIE
from .videomore import VideomoreIE from .videomore import VideomoreIE
from .webcaster import WebcasterFeedIE
from .googledrive import GoogleDriveIE from .googledrive import GoogleDriveIE
from .jwplatform import JWPlatformIE from .jwplatform import JWPlatformIE
from .digiteka import DigitekaIE from .digiteka import DigitekaIE
@ -1189,16 +1189,6 @@ class GenericIE(InfoExtractor):
'duration': 248.667, 'duration': 248.667,
}, },
}, },
# ScreenwaveMedia embed
{
'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1',
'md5': '24ace5baba0d35d55c6810b51f34e9e0',
'info_dict': {
'id': 'cinemasnob-55d26273809dd',
'ext': 'mp4',
'title': 'cinemasnob',
},
},
# BrightcoveInPageEmbed embed # BrightcoveInPageEmbed embed
{ {
'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/', 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
@ -2140,6 +2130,11 @@ class GenericIE(InfoExtractor):
if videomore_url: if videomore_url:
return self.url_result(videomore_url) return self.url_result(videomore_url)
# Look for Webcaster embeds
webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
if webcaster_url:
return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
# Look for Playwire embeds # Look for Playwire embeds
mobj = re.search( mobj = re.search(
r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage) r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
@ -2206,11 +2201,6 @@ class GenericIE(InfoExtractor):
if jwplatform_url: if jwplatform_url:
return self.url_result(jwplatform_url, 'JWPlatform') return self.url_result(jwplatform_url, 'JWPlatform')
# Look for ScreenwaveMedia embeds
mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage)
if mobj is not None:
return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia')
# Look for Digiteka embeds # Look for Digiteka embeds
digiteka_url = DigitekaIE._extract_url(webpage) digiteka_url = DigitekaIE._extract_url(webpage)
if digiteka_url: if digiteka_url:

View File

@ -6,12 +6,13 @@ from .common import InfoExtractor
from ..utils import ( from ..utils import (
js_to_json, js_to_json,
remove_end, remove_end,
determine_ext,
) )
class HellPornoIE(InfoExtractor): class HellPornoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?hellporno\.com/videos/(?P<id>[^/]+)' _VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P<id>[^/]+)'
_TEST = { _TESTS = [{
'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/', 'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
'md5': '1fee339c610d2049699ef2aa699439f1', 'md5': '1fee339c610d2049699ef2aa699439f1',
'info_dict': { 'info_dict': {
@ -22,7 +23,10 @@ class HellPornoIE(InfoExtractor):
'thumbnail': 're:https?://.*\.jpg$', 'thumbnail': 're:https?://.*\.jpg$',
'age_limit': 18, 'age_limit': 18,
} }
} }, {
'url': 'http://hellporno.net/v/186271/',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
@ -38,7 +42,7 @@ class HellPornoIE(InfoExtractor):
video_id = flashvars.get('video_id') video_id = flashvars.get('video_id')
thumbnail = flashvars.get('preview_url') thumbnail = flashvars.get('preview_url')
ext = flashvars.get('postfix', '.mp4')[1:] ext = determine_ext(flashvars.get('postfix'), 'mp4')
formats = [] formats = []
for video_url_key in ['video_url', 'video_alt_url']: for video_url_key in ['video_url', 'video_alt_url']:

View File

@ -54,6 +54,22 @@ class LiveLeakIE(InfoExtractor):
'title': 'Crazy Hungarian tourist films close call waterspout in Croatia', 'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
'thumbnail': 're:^https?://.*\.jpg$' 'thumbnail': 're:^https?://.*\.jpg$'
} }
}, {
# Covers https://github.com/rg3/youtube-dl/pull/10664#issuecomment-247439521
'url': 'http://m.liveleak.com/view?i=763_1473349649',
'add_ie': ['Youtube'],
'info_dict': {
'id': '763_1473349649',
'ext': 'mp4',
'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty',
'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.',
'uploader': 'Ziz',
'upload_date': '20160908',
'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw'
},
'params': {
'skip_download': True,
},
}] }]
@staticmethod @staticmethod
@ -87,7 +103,7 @@ class LiveLeakIE(InfoExtractor):
else: else:
# Maybe an embed? # Maybe an embed?
embed_url = self._search_regex( embed_url = self._search_regex(
r'<iframe[^>]+src="(http://www.prochan.com/embed\?[^"]+)"', r'<iframe[^>]+src="(https?://(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
webpage, 'embed URL') webpage, 'embed URL')
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
@ -107,6 +123,7 @@ class LiveLeakIE(InfoExtractor):
'format_note': s.get('label'), 'format_note': s.get('label'),
'url': s['file'], 'url': s['file'],
} for i, s in enumerate(sources)] } for i, s in enumerate(sources)]
for i, s in enumerate(sources): for i, s in enumerate(sources):
# Removing '.h264_*.mp4' gives the raw video, which is essentially # Removing '.h264_*.mp4' gives the raw video, which is essentially
# the same video without the LiveLeak logo at the top (see # the same video without the LiveLeak logo at the top (see

View File

@ -75,7 +75,7 @@ class MiTeleBaseIE(InfoExtractor):
class MiTeleIE(InfoExtractor): class MiTeleIE(InfoExtractor):
IE_DESC = 'mitele.es' IE_DESC = 'mitele.es'
_VALID_URL = r'https?://(?:www\.)?mitele\.es/programas-tv/(?:[^/]+/)(?P<id>[^/]+)/player' _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
_TESTS = [{ _TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player', 'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
@ -86,7 +86,10 @@ class MiTeleIE(InfoExtractor):
'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', 'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
'series': 'Diario de', 'series': 'Diario de',
'season': 'La redacción', 'season': 'La redacción',
'season_number': 14,
'season_id': 'diario_de_t14_11981',
'episode': 'Programa 144', 'episode': 'Programa 144',
'episode_number': 3,
'thumbnail': 're:(?i)^https?://.*\.jpg$', 'thumbnail': 're:(?i)^https?://.*\.jpg$',
'duration': 2913, 'duration': 2913,
}, },
@ -101,7 +104,10 @@ class MiTeleIE(InfoExtractor):
'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f', 'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
'series': 'Cuarto Milenio', 'series': 'Cuarto Milenio',
'season': 'Temporada 6', 'season': 'Temporada 6',
'season_number': 6,
'season_id': 'cuarto_milenio_t06_12715',
'episode': 'Programa 226', 'episode': 'Programa 226',
'episode_number': 24,
'thumbnail': 're:(?i)^https?://.*\.jpg$', 'thumbnail': 're:(?i)^https?://.*\.jpg$',
'duration': 7313, 'duration': 7313,
}, },
@ -109,41 +115,77 @@ class MiTeleIE(InfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['Ooyala'], 'add_ie': ['Ooyala'],
}, {
'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
gigya_url = self._search_regex(r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s*src="([^"]*)">[^>]*</script>', webpage, 'gigya', default=None) gigya_url = self._search_regex(
gigya_sc = self._download_webpage(compat_urlparse.urljoin(r'http://www.mitele.es/', gigya_url), video_id, 'Downloading gigya script') r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s+src="([^"]*)">[^>]*</script>',
webpage, 'gigya', default=None)
gigya_sc = self._download_webpage(
compat_urlparse.urljoin('http://www.mitele.es/', gigya_url),
video_id, 'Downloading gigya script')
# Get a appKey/uuid for getting the session key # Get a appKey/uuid for getting the session key
appKey_var = self._search_regex(r'value\("appGridApplicationKey",([0-9a-f]+)\)', gigya_sc, 'appKey variable') appKey_var = self._search_regex(
appKey = self._search_regex(r'var %s="([0-9a-f]+)"' % appKey_var, gigya_sc, 'appKey') r'value\s*\(\s*["\']appGridApplicationKey["\']\s*,\s*([0-9a-f]+)',
uid = compat_str(uuid.uuid4()) gigya_sc, 'appKey variable')
session_url = 'https://appgrid-api.cloud.accedo.tv/session?appKey=%s&uuid=%s' % (appKey, uid) appKey = self._search_regex(
session_json = self._download_json(session_url, video_id, 'Downloading session keys') r'var\s+%s\s*=\s*["\']([0-9a-f]+)' % appKey_var, gigya_sc, 'appKey')
sessionKey = compat_str(session_json['sessionKey'])
session_json = self._download_json(
'https://appgrid-api.cloud.accedo.tv/session',
video_id, 'Downloading session keys', query={
'appKey': appKey,
'uuid': compat_str(uuid.uuid4()),
})
paths = self._download_json(
'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration',
video_id, 'Downloading paths JSON',
query={'sessionKey': compat_str(session_json['sessionKey'])})
paths_url = 'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration?sessionKey=' + sessionKey
paths = self._download_json(paths_url, video_id, 'Downloading paths JSON')
ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search'] ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
data_p = ( source = self._download_json(
'http://' + ooyala_s['base_url'] + ooyala_s['full_path'] + ooyala_s['provider_id'] + 'http://%s%s%s/docs/%s' % (
'/docs/' + video_id + '?include_titles=Series,Season&product_name=test&format=full') ooyala_s['base_url'], ooyala_s['full_path'],
data = self._download_json(data_p, video_id, 'Downloading data JSON') ooyala_s['provider_id'], video_id),
source = data['hits']['hits'][0]['_source'] video_id, 'Downloading data JSON', query={
embedCode = source['offers'][0]['embed_codes'][0] 'include_titles': 'Series,Season',
'product_name': 'test',
'format': 'full',
})['hits']['hits'][0]['_source']
embedCode = source['offers'][0]['embed_codes'][0]
titles = source['localizable_titles'][0] titles = source['localizable_titles'][0]
title = titles.get('title_medium') or titles['title_long'] title = titles.get('title_medium') or titles['title_long']
episode = titles['title_sort_name']
description = titles['summary_long'] description = titles.get('summary_long') or titles.get('summary_medium')
titles_series = source['localizable_titles_series'][0]
series = titles_series['title_long'] def get(key1, key2):
titles_season = source['localizable_titles_season'][0] value1 = source.get(key1)
season = titles_season['title_medium'] if not value1 or not isinstance(value1, list):
duration = parse_duration(source['videos'][0]['duration']) return
if not isinstance(value1[0], dict):
return
return value1[0].get(key2)
series = get('localizable_titles_series', 'title_medium')
season = get('localizable_titles_season', 'title_medium')
season_number = int_or_none(source.get('season_number'))
season_id = source.get('season_id')
episode = titles.get('title_sort_name')
episode_number = int_or_none(source.get('episode_number'))
duration = parse_duration(get('videos', 'duration'))
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
@ -154,7 +196,10 @@ class MiTeleIE(InfoExtractor):
'description': description, 'description': description,
'series': series, 'series': series,
'season': season, 'season': season,
'season_number': season_number,
'season_id': season_id,
'episode': episode, 'episode': episode,
'episode_number': episode_number,
'duration': duration, 'duration': duration,
'thumbnail': source['images'][0]['url'], 'thumbnail': get('images', 'url'),
} }

View File

@ -13,6 +13,7 @@ from ..utils import (
fix_xml_ampersands, fix_xml_ampersands,
float_or_none, float_or_none,
HEADRequest, HEADRequest,
NO_DEFAULT,
RegexNotFoundError, RegexNotFoundError,
sanitized_Request, sanitized_Request,
strip_or_none, strip_or_none,
@ -201,7 +202,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
[self._get_video_info(item) for item in idoc.findall('.//item')], [self._get_video_info(item) for item in idoc.findall('.//item')],
playlist_title=title, playlist_description=description) playlist_title=title, playlist_description=description)
def _extract_mgid(self, webpage): def _extract_mgid(self, webpage, default=NO_DEFAULT):
try: try:
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf # the url can be http://media.mtvnservices.com/fb/{mgid}.swf
# or http://media.mtvnservices.com/{mgid} # or http://media.mtvnservices.com/{mgid}
@ -221,7 +222,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
sm4_embed = self._html_search_meta( sm4_embed = self._html_search_meta(
'sm4:video:embed', webpage, 'sm4 embed', default='') 'sm4:video:embed', webpage, 'sm4 embed', default='')
mgid = self._search_regex( mgid = self._search_regex(
r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid') r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=default)
return mgid return mgid
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -2,7 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from .screenwavemedia import ScreenwaveMediaIE from .jwplatform import JWPlatformIE
from ..utils import ( from ..utils import (
unified_strdate, unified_strdate,
@ -25,7 +25,7 @@ class NormalbootsIE(InfoExtractor):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['ScreenwaveMedia'], 'add_ie': ['JWPlatform'],
} }
def _real_extract(self, url): def _real_extract(self, url):
@ -39,15 +39,13 @@ class NormalbootsIE(InfoExtractor):
r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>', r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>',
webpage, 'date', fatal=False)) webpage, 'date', fatal=False))
screenwavemedia_url = self._html_search_regex( jwplatform_url = JWPlatformIE._extract_url(webpage)
ScreenwaveMediaIE.EMBED_PATTERN, webpage, 'screenwave URL',
group='url')
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'id': video_id, 'id': video_id,
'url': screenwavemedia_url, 'url': jwplatform_url,
'ie_key': ScreenwaveMediaIE.ie_key(), 'ie_key': JWPlatformIE.ie_key(),
'title': self._og_search_title(webpage), 'title': self._og_search_title(webpage),
'description': self._og_search_description(webpage), 'description': self._og_search_description(webpage),
'thumbnail': self._og_search_thumbnail(webpage), 'thumbnail': self._og_search_thumbnail(webpage),

View File

@ -1,20 +1,12 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals, division from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_chr
compat_chr,
compat_ord,
)
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError, ExtractorError,
) )
from ..jsinterp import JSInterpreter
from ..jsinterp.jsgrammar import _NAME_RE
class OpenloadIE(InfoExtractor): class OpenloadIE(InfoExtractor):
@ -61,44 +53,6 @@ class OpenloadIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
def openload_decode(self, txt):
symbol_dict = {
'(゚Д゚) [゚Θ゚]': '_',
'(゚Д゚) [゚ω゚ノ]': 'a',
'(゚Д゚) [゚Θ゚ノ]': 'b',
'(゚Д゚) [\'c\']': 'c',
'(゚Д゚) [゚ー゚ノ]': 'd',
'(゚Д゚) [゚Д゚ノ]': 'e',
'(゚Д゚) [1]': 'f',
'(゚Д゚) [\'o\']': 'o',
'(o゚ー゚o)': 'u',
'(゚Д゚) [\'c\']': 'c',
'((゚ー゚) + (o^_^o))': '7',
'((o^_^o) +(o^_^o) +(c^_^o))': '6',
'((゚ー゚) + (゚Θ゚))': '5',
'(-~3)': '4',
'(-~-~1)': '3',
'(-~1)': '2',
'(-~0)': '1',
'((c^_^o)-(c^_^o))': '0',
}
delim = '(゚Д゚)[゚ε゚]+'
end_token = '(゚Д゚)[゚o゚]'
symbols = '|'.join(map(re.escape, symbol_dict.keys()))
txt = re.sub('(%s)\+\s?' % symbols, lambda m: symbol_dict[m.group(1)], txt)
ret = ''
for aacode in re.findall(r'{0}\+\s?{1}(.*?){0}'.format(re.escape(end_token), re.escape(delim)), txt):
for aachar in aacode.split(delim):
if aachar.isdigit():
ret += compat_chr(int(aachar, 8))
else:
m = re.match(r'^u([\da-f]{4})$', aachar)
if m:
ret += compat_chr(int(m.group(1), 16))
else:
self.report_warning("Cannot decode: %s" % aachar)
return ret
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id) webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id)
@ -106,36 +60,20 @@ class OpenloadIE(InfoExtractor):
if 'File not found' in webpage or 'deleted by the owner' in webpage: if 'File not found' in webpage or 'deleted by the owner' in webpage:
raise ExtractorError('File not found', expected=True) raise ExtractorError('File not found', expected=True)
# The following decryption algorithm is written by @yokrysty and ol_id = self._search_regex(
# declared to be freely used in youtube-dl '<span[^>]+id="[a-zA-Z0-9]+x"[^>]*>([0-9]+)</span>',
# See https://github.com/rg3/youtube-dl/issues/10408 webpage, 'openload ID')
enc_data = self._html_search_regex(
r'<span[^>]*>([^<]+)</span>\s*<span[^>]*>[^<]+</span>\s*<span[^>]+id="streamurl"',
webpage, 'encrypted data')
enc_code = self._html_search_regex(r'<script[^>]+>(゚ω゚[^<]+)</script>', first_two_chars = int(float(ol_id[0:][:2]))
webpage, 'encrypted code') urlcode = ''
num = 2
js_code = self.openload_decode(enc_code) while num < len(ol_id):
jsi = JSInterpreter(js_code) urlcode += compat_chr(int(float(ol_id[num:][:3])) -
first_two_chars * int(float(ol_id[num + 3:][:2])))
num += 5
m_offset_fun = self._search_regex(r'slice\(0\s*-\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript offset function') video_url = 'https://openload.co/stream/' + urlcode
m_diff_fun = self._search_regex(r'charCodeAt\(0\)\s*\+\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript diff function')
offset = jsi.call_function(m_offset_fun)
diff = jsi.call_function(m_diff_fun)
video_url_chars = []
for idx, c in enumerate(enc_data):
j = compat_ord(c)
if j >= 33 and j <= 126:
j = ((j + 14) % 94) + 33
if idx == len(enc_data) - offset:
j += diff
video_url_chars += compat_chr(j)
video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars)
title = self._og_search_title(webpage, default=None) or self._search_regex( title = self._og_search_title(webpage, default=None) or self._search_regex(
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
@ -154,5 +92,4 @@ class OpenloadIE(InfoExtractor):
'ext': determine_ext(title), 'ext': determine_ext(title),
'subtitles': subtitles, 'subtitles': subtitles,
} }
return info_dict return info_dict

View File

@ -11,6 +11,7 @@ from ..utils import (
float_or_none, float_or_none,
parse_duration, parse_duration,
str_to_int, str_to_int,
urlencode_postdata,
) )
@ -56,6 +57,22 @@ class PandoraTVIE(InfoExtractor):
r'^v(\d+)[Uu]rl$', format_id, 'height', default=None) r'^v(\d+)[Uu]rl$', format_id, 'height', default=None)
if not height: if not height:
continue continue
play_url = self._download_json(
'http://m.pandora.tv/?c=api&m=play_url', video_id,
data=urlencode_postdata({
'prgid': video_id,
'runtime': info.get('runtime'),
'vod_url': format_url,
}),
headers={
'Origin': url,
'Content-Type': 'application/x-www-form-urlencoded',
})
format_url = play_url.get('url')
if not format_url:
continue
formats.append({ formats.append({
'format_id': '%sp' % height, 'format_id': '%sp' % height,
'url': format_url, 'url': format_url,

View File

@ -10,7 +10,7 @@ from ..utils import (
class Puls4IE(ProSiebenSat1BaseIE): class Puls4IE(ProSiebenSat1BaseIE):
_VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>(?:[^/]+/)*?videos/[^?#]+)' _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>[^?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118', 'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118',
'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03', 'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03',
@ -22,6 +22,12 @@ class Puls4IE(ProSiebenSat1BaseIE):
'upload_date': '20160830', 'upload_date': '20160830',
'uploader': 'PULS_4', 'uploader': 'PULS_4',
}, },
}, {
'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident.-Norbert-Hofer',
'only_matching': True,
}, {
'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident-Analyse-des-Interviews-mit-Norbert-Hofer-416598',
'only_matching': True,
}] }]
_TOKEN = 'puls4' _TOKEN = 'puls4'
_SALT = '01!kaNgaiNgah1Ie4AeSha' _SALT = '01!kaNgaiNgah1Ie4AeSha'

View File

@ -5,6 +5,7 @@ from .common import InfoExtractor
from ..compat import compat_urllib_parse_urlparse from ..compat import compat_urllib_parse_urlparse
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
ExtractorError,
int_or_none, int_or_none,
xpath_attr, xpath_attr,
xpath_text, xpath_text,
@ -101,6 +102,11 @@ class RuutuIE(InfoExtractor):
}) })
extract_formats(video_xml.find('./Clip')) extract_formats(video_xml.find('./Clip'))
drm = xpath_text(video_xml, './Clip/DRM', default=None)
if not formats and drm:
raise ExtractorError('This video is DRM protected.', expected=True)
self._sort_formats(formats) self._sort_formats(formats)
return { return {

View File

@ -1,146 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
unified_strdate,
js_to_json,
)
class ScreenwaveMediaIE(InfoExtractor):
_VALID_URL = r'(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)'
EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1'
_TESTS = [{
'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
playerdata = self._download_webpage(
'http://player.screenwavemedia.com/player.php?id=%s' % video_id,
video_id, 'Downloading player webpage')
vidtitle = self._search_regex(
r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
playerconfig = self._download_webpage(
'http://player.screenwavemedia.com/player.js',
video_id, 'Downloading playerconfig webpage')
videoserver = self._search_regex(r'SWMServer\s*=\s*"([\d\.]+)"', playerdata, 'videoserver')
sources = self._parse_json(
js_to_json(
re.sub(
r'(?s)/\*.*?\*/', '',
self._search_regex(
r'sources\s*:\s*(\[[^\]]+?\])', playerconfig,
'sources',
).replace(
"' + thisObj.options.videoserver + '",
videoserver
).replace(
"' + playerVidId + '",
video_id
)
)
),
video_id, fatal=False
)
# Fallback to hardcoded sources if JS changes again
if not sources:
self.report_warning('Falling back to a hardcoded list of streams')
sources = [{
'file': 'http://%s/vod/%s_%s.mp4' % (videoserver, video_id, format_id),
'type': 'mp4',
'label': format_label,
} for format_id, format_label in (
('low', '144p Low'), ('med', '160p Med'), ('high', '360p High'), ('hd1', '720p HD1'))]
sources.append({
'file': 'http://%s/vod/smil:%s.smil/playlist.m3u8' % (videoserver, video_id),
'type': 'hls',
})
formats = []
for source in sources:
file_ = source.get('file')
if not file_:
continue
if source.get('type') == 'hls':
formats.extend(self._extract_m3u8_formats(file_, video_id, ext='mp4'))
else:
format_id = self._search_regex(
r'_(.+?)\.[^.]+$', file_, 'format id', default=None)
if not self._is_valid_url(file_, video_id, format_id or 'video'):
continue
format_label = source.get('label')
height = int_or_none(self._search_regex(
r'^(\d+)[pP]', format_label, 'height', default=None))
formats.append({
'url': file_,
'format_id': format_id,
'format': format_label,
'ext': source.get('type'),
'height': height,
})
self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
return {
'id': video_id,
'title': vidtitle,
'formats': formats,
}
class TeamFourIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
_TEST = {
'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/',
'info_dict': {
'id': 'TeamFourStar-5292a02f20bfa',
'ext': 'mp4',
'upload_date': '20130401',
'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
'title': 'A Moment With TFS Episode 4',
},
'params': {
# m3u8 download
'skip_download': True,
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
playerdata_url = self._search_regex(
r'src="(http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
webpage, 'player data URL')
video_title = self._html_search_regex(
r'<div class="heroheadingtitle">(?P<title>.+?)</div>',
webpage, 'title')
video_date = unified_strdate(self._html_search_regex(
r'<div class="heroheadingdate">(?P<date>.+?)</div>',
webpage, 'date', fatal=False))
video_description = self._html_search_regex(
r'(?s)<div class="postcontent">(?P<description>.+?)</div>',
webpage, 'description', fatal=False)
video_thumbnail = self._og_search_thumbnail(webpage)
return {
'_type': 'url_transparent',
'display_id': display_id,
'title': video_title,
'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
'url': playerdata_url,
}

View File

@ -121,7 +121,7 @@ class SoundcloudIE(InfoExtractor):
}, },
] ]
_CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea' _CLIENT_ID = 'fDoItMDbsbZz8dY16ZzARCZmzgHBPotA'
_IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf'
@staticmethod @staticmethod

View File

@ -1,5 +1,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .mtv import MTVServicesInfoExtractor from .mtv import MTVServicesInfoExtractor
@ -16,6 +18,15 @@ class SpikeIE(MTVServicesInfoExtractor):
'timestamp': 1388120400, 'timestamp': 1388120400,
'upload_date': '20131227', 'upload_date': '20131227',
}, },
}, {
'url': 'http://www.spike.com/full-episodes/j830qm/lip-sync-battle-joel-mchale-vs-jim-rash-season-2-ep-209',
'md5': 'b25c6f16418aefb9ad5a6cae2559321f',
'info_dict': {
'id': '37ace3a8-1df6-48be-85b8-38df8229e241',
'ext': 'mp4',
'title': 'Lip Sync Battle|April 28, 2016|2|209|Joel McHale Vs. Jim Rash|Act 1',
'description': 'md5:a739ca8f978a7802f67f8016d27ce114',
},
}, { }, {
'url': 'http://www.spike.com/video-clips/lhtu8m/', 'url': 'http://www.spike.com/video-clips/lhtu8m/',
'only_matching': True, 'only_matching': True,
@ -32,3 +43,12 @@ class SpikeIE(MTVServicesInfoExtractor):
_FEED_URL = 'http://www.spike.com/feeds/mrss/' _FEED_URL = 'http://www.spike.com/feeds/mrss/'
_MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s' _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s'
_CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)')
def _extract_mgid(self, webpage):
mgid = super(SpikeIE, self)._extract_mgid(webpage, default=None)
if mgid is None:
url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id')
video_type, episode_id = url_parts.split('/', 1)
mgid = 'mgid:arc:{0}:spike.com:{1}'.format(video_type, episode_id)
return mgid

View File

@ -0,0 +1,48 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from .jwplatform import JWPlatformIE
from ..utils import unified_strdate
class TeamFourStarIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/(?P<id>[a-z0-9\-]+)'
_TEST = {
'url': 'http://teamfourstar.com/tfs-abridged-parody-episode-1-2/',
'info_dict': {
'id': '0WdZO31W',
'title': 'TFS Abridged Parody Episode 1',
'description': 'md5:d60bc389588ebab2ee7ad432bda953ae',
'ext': 'mp4',
'timestamp': 1394168400,
'upload_date': '20080508',
},
}
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
jwplatform_url = JWPlatformIE._extract_url(webpage)
video_title = self._html_search_regex(
r'<h1[^>]+class="entry-title"[^>]*>(?P<title>.+?)</h1>',
webpage, 'title')
video_date = unified_strdate(self._html_search_regex(
r'<span[^>]+class="meta-date date updated"[^>]*>(?P<date>.+?)</span>',
webpage, 'date', fatal=False))
video_description = self._html_search_regex(
r'(?s)<div[^>]+class="content-inner"[^>]*>.*?(?P<description><p>.+?)</div>',
webpage, 'description', fatal=False)
video_thumbnail = self._og_search_thumbnail(webpage)
return {
'_type': 'url_transparent',
'display_id': display_id,
'title': video_title,
'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
'url': jwplatform_url,
}

View File

@ -7,33 +7,30 @@ from .common import InfoExtractor
class TeleBruxellesIE(InfoExtractor): class TeleBruxellesIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)' _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt|emission)/?(?P<id>[^/#?]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/', 'url': 'http://bx1.be/news/que-risque-lauteur-dune-fausse-alerte-a-la-bombe/',
'md5': '59439e568c9ee42fb77588b2096b214f', 'md5': 'a2a67a5b1c3e8c9d33109b902f474fd9',
'info_dict': { 'info_dict': {
'id': '11942', 'id': '158856',
'display_id': 'auditions-devant-parlement-francken-galant-tres-attendus', 'display_id': 'que-risque-lauteur-dune-fausse-alerte-a-la-bombe',
'ext': 'flv', 'ext': 'mp4',
'title': 'Parlement : Francken et Galant répondent aux interpellations de lopposition', 'title': 'Que risque lauteur dune fausse alerte à la bombe ?',
'description': 're:Les auditions des ministres se poursuivent*' 'description': 'md5:3cf8df235d44ebc5426373050840e466',
},
'params': {
'skip_download': 'requires rtmpdump'
}, },
}, { }, {
'url': 'http://www.telebruxelles.be/sport/basket-brussels-bat-mons-80-74/', 'url': 'http://bx1.be/sport/futsal-schaerbeek-sincline-5-3-a-thulin/',
'md5': '181d3fbdcf20b909309e5aef5c6c6047', 'md5': 'dfe07ecc9c153ceba8582ac912687675',
'info_dict': { 'info_dict': {
'id': '10091', 'id': '158433',
'display_id': 'basket-brussels-bat-mons-80-74', 'display_id': 'futsal-schaerbeek-sincline-5-3-a-thulin',
'ext': 'flv', 'ext': 'mp4',
'title': 'Basket : le Brussels bat Mons 80-74', 'title': 'Futsal : Schaerbeek sincline 5-3 à Thulin',
'description': 're:^Ils l\u2019on fait ! En basket, le B*', 'description': 'md5:fd013f1488d5e2dceb9cebe39e2d569b',
},
'params': {
'skip_download': 'requires rtmpdump'
}, },
}, {
'url': 'http://bx1.be/emission/bxenf1-gastronomie/',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -50,13 +47,13 @@ class TeleBruxellesIE(InfoExtractor):
r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"', r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"',
webpage, 'RTMP url') webpage, 'RTMP url')
rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url) rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url)
formats = self._extract_wowza_formats(rtmp_url, article_id or display_id)
self._sort_formats(formats)
return { return {
'id': article_id or display_id, 'id': article_id or display_id,
'display_id': display_id, 'display_id': display_id,
'title': title, 'title': title,
'description': description, 'description': description,
'url': rtmp_url, 'formats': formats,
'ext': 'flv',
'rtmp_live': True # if rtmpdump is not called with "--live" argument, the download is blocked and can be completed
} }

View File

@ -5,10 +5,10 @@ from .common import InfoExtractor
class ThisOldHouseIE(InfoExtractor): class ThisOldHouseIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to)/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode)/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench', 'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
'md5': '568acf9ca25a639f0c4ff905826b662f', 'md5': '946f05bbaa12a33f9ae35580d2dfcfe3',
'info_dict': { 'info_dict': {
'id': '2REGtUDQ', 'id': '2REGtUDQ',
'ext': 'mp4', 'ext': 'mp4',
@ -20,6 +20,9 @@ class ThisOldHouseIE(InfoExtractor):
}, { }, {
'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins', 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -51,7 +51,7 @@ class VevoIE(VevoBaseIE):
'artist': 'Hurts', 'artist': 'Hurts',
'genre': 'Pop', 'genre': 'Pop',
}, },
'expected_warnings': ['Unable to download SMIL file'], 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
}, { }, {
'note': 'v3 SMIL format', 'note': 'v3 SMIL format',
'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923',
@ -67,7 +67,7 @@ class VevoIE(VevoBaseIE):
'artist': 'Cassadee Pope', 'artist': 'Cassadee Pope',
'genre': 'Country', 'genre': 'Country',
}, },
'expected_warnings': ['Unable to download SMIL file'], 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
}, { }, {
'note': 'Age-limited video', 'note': 'Age-limited video',
'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282',
@ -83,7 +83,7 @@ class VevoIE(VevoBaseIE):
'artist': 'Justin Timberlake', 'artist': 'Justin Timberlake',
'genre': 'Pop', 'genre': 'Pop',
}, },
'expected_warnings': ['Unable to download SMIL file'], 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
}, { }, {
'note': 'No video_info', 'note': 'No video_info',
'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000', 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000',
@ -91,15 +91,33 @@ class VevoIE(VevoBaseIE):
'info_dict': { 'info_dict': {
'id': 'USUV71503000', 'id': 'USUV71503000',
'ext': 'mp4', 'ext': 'mp4',
'title': 'K Camp - Till I Die', 'title': 'K Camp ft. T.I. - Till I Die',
'age_limit': 18, 'age_limit': 18,
'timestamp': 1449468000, 'timestamp': 1449468000,
'upload_date': '20151207', 'upload_date': '20151207',
'uploader': 'K Camp', 'uploader': 'K Camp',
'track': 'Till I Die', 'track': 'Till I Die',
'artist': 'K Camp', 'artist': 'K Camp',
'genre': 'Rap/Hip-Hop', 'genre': 'Hip-Hop',
}, },
'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
}, {
'note': 'Featured test',
'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190',
'md5': 'd28675e5e8805035d949dc5cf161071d',
'info_dict': {
'id': 'USUV71402190',
'ext': 'mp4',
'title': 'Lemaitre ft. LoLo - Wait',
'age_limit': 0,
'timestamp': 1413432000,
'upload_date': '20141016',
'uploader': 'Lemaitre',
'track': 'Wait',
'artist': 'Lemaitre',
'genre': 'Electronic',
},
'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'],
}, { }, {
'note': 'Only available via webpage', 'note': 'Only available via webpage',
'url': 'http://www.vevo.com/watch/GBUV71600656', 'url': 'http://www.vevo.com/watch/GBUV71600656',
@ -242,8 +260,11 @@ class VevoIE(VevoBaseIE):
timestamp = parse_iso8601(video_info.get('releaseDate')) timestamp = parse_iso8601(video_info.get('releaseDate'))
artists = video_info.get('artists') artists = video_info.get('artists')
if artists: for curr_artist in artists:
artist = uploader = artists[0]['name'] if curr_artist.get('role') == 'Featured':
featured_artist = curr_artist['name']
else:
artist = uploader = curr_artist['name']
view_count = int_or_none(video_info.get('views', {}).get('total')) view_count = int_or_none(video_info.get('views', {}).get('total'))
for video_version in video_versions: for video_version in video_versions:

View File

@ -1,11 +1,12 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import time
import hmac
import hashlib import hashlib
import hmac
import itertools import itertools
import json
import re
import time
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -276,10 +277,14 @@ class VikiIE(VikiBaseIE):
height = int_or_none(self._search_regex( height = int_or_none(self._search_regex(
r'^(\d+)[pP]$', format_id, 'height', default=None)) r'^(\d+)[pP]$', format_id, 'height', default=None))
for protocol, format_dict in stream_dict.items(): for protocol, format_dict in stream_dict.items():
# rtmps URLs does not seem to work
if protocol == 'rtmps':
continue
format_url = format_dict['url']
if format_id == 'm3u8': if format_id == 'm3u8':
m3u8_formats = self._extract_m3u8_formats( m3u8_formats = self._extract_m3u8_formats(
format_dict['url'], video_id, 'mp4', format_url, video_id, 'mp4',
entry_protocol='m3u8_native', preference=-1, entry_protocol='m3u8_native',
m3u8_id='m3u8-%s' % protocol, fatal=False) m3u8_id='m3u8-%s' % protocol, fatal=False)
# Despite CODECS metadata in m3u8 all video-only formats # Despite CODECS metadata in m3u8 all video-only formats
# are actually video+audio # are actually video+audio
@ -287,9 +292,23 @@ class VikiIE(VikiBaseIE):
if f.get('acodec') == 'none' and f.get('vcodec') != 'none': if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
f['acodec'] = None f['acodec'] = None
formats.extend(m3u8_formats) formats.extend(m3u8_formats)
elif format_url.startswith('rtmp'):
mobj = re.search(
r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
format_url)
if not mobj:
continue
formats.append({
'format_id': 'rtmp-%s' % format_id,
'ext': 'flv',
'url': mobj.group('url'),
'play_path': mobj.group('playpath'),
'app': mobj.group('app'),
'page_url': url,
})
else: else:
formats.append({ formats.append({
'url': format_dict['url'], 'url': format_url,
'format_id': '%s-%s' % (format_id, protocol), 'format_id': '%s-%s' % (format_id, protocol),
'height': height, 'height': height,
}) })

View File

@ -0,0 +1,102 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
determine_ext,
xpath_text,
)
class WebcasterIE(InfoExtractor):
_VALID_URL = r'https?://bl\.webcaster\.pro/(?:quote|media)/start/free_(?P<id>[^/]+)'
_TESTS = [{
# http://video.khl.ru/quotes/393859
'url': 'http://bl.webcaster.pro/quote/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104?sr%3D105%26fa%3D1%26type_id%3D18',
'md5': '0c162f67443f30916ff1c89425dcd4cd',
'info_dict': {
'id': 'c8cefd240aa593681c8d068cff59f407_hd',
'ext': 'mp4',
'title': 'Сибирь - Нефтехимик. Лучшие моменты первого периода',
'thumbnail': 're:^https?://.*\.jpg$',
},
}, {
'url': 'http://bl.webcaster.pro/media/start/free_6246c7a4453ac4c42b4398f840d13100_hd/2_2991109016/e8d0d82587ef435480118f9f9c41db41/4635726126',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_xml(url, video_id)
title = xpath_text(video, './/event_name', 'event name', fatal=True)
def make_id(parts, separator):
return separator.join(filter(None, parts))
formats = []
for format_id in (None, 'noise'):
track_tag = make_id(('track', format_id), '_')
for track in video.findall('.//iphone/%s' % track_tag):
track_url = track.text
if not track_url:
continue
if determine_ext(track_url) == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
track_url, video_id, 'mp4',
entry_protocol='m3u8_native',
m3u8_id=make_id(('hls', format_id), '-'), fatal=False)
for f in m3u8_formats:
f.update({
'source_preference': 0 if format_id == 'noise' else 1,
'format_note': track.get('title'),
})
formats.extend(m3u8_formats)
self._sort_formats(formats)
thumbnail = xpath_text(video, './/image', 'thumbnail')
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}
class WebcasterFeedIE(InfoExtractor):
_VALID_URL = r'https?://bl\.webcaster\.pro/feed/start/free_(?P<id>[^/]+)'
_TEST = {
'url': 'http://bl.webcaster.pro/feed/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104',
'only_matching': True,
}
@staticmethod
def _extract_url(ie, webpage):
mobj = re.search(
r'<(?:object|a[^>]+class=["\']webcaster-player["\'])[^>]+data(?:-config)?=(["\']).*?config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_.*?)(?:[?&]|\1)',
webpage)
if mobj:
return mobj.group('url')
for secure in (True, False):
video_url = ie._og_search_video_url(
webpage, secure=secure, default=None)
if video_url:
mobj = re.search(
r'config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_[^?&=]+)',
video_url)
if mobj:
return mobj.group('url')
def _real_extract(self, url):
video_id = self._match_id(url)
feed = self._download_xml(url, video_id)
video_url = xpath_text(
feed, ('video_hd', 'video'), 'video url', fatal=True)
return self.url_result(video_url, WebcasterIE.ie_key())

View File

@ -1796,7 +1796,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
| |
((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,}) ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,})
)""" )"""
_TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?' _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
IE_NAME = 'youtube:playlist' IE_NAME = 'youtube:playlist'
_TESTS = [{ _TESTS = [{

View File

@ -55,12 +55,12 @@ class Socks5AddressType(object):
ATYP_IPV6 = 0x04 ATYP_IPV6 = 0x04
class ProxyError(IOError): class ProxyError(socket.error):
ERR_SUCCESS = 0x00 ERR_SUCCESS = 0x00
def __init__(self, code=None, msg=None): def __init__(self, code=None, msg=None):
if code is not None and msg is None: if code is not None and msg is None:
msg = self.CODES.get(code) and 'unknown error' msg = self.CODES.get(code) or 'unknown error'
super(ProxyError, self).__init__(code, msg) super(ProxyError, self).__init__(code, msg)
@ -123,7 +123,7 @@ class sockssocket(socket.socket):
while len(data) < cnt: while len(data) < cnt:
cur = self.recv(cnt - len(data)) cur = self.recv(cnt - len(data))
if not cur: if not cur:
raise IOError('{0} bytes missing'.format(cnt - len(data))) raise EOFError('{0} bytes missing'.format(cnt - len(data)))
data += cur data += cur
return data return data

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2016.11.18' __version__ = '2016.12.09'