This commit is contained in:
Gilles Habran 2016-03-25 11:37:11 +01:00
commit ef94573770
227 changed files with 3668 additions and 1145 deletions

3
.gitignore vendored
View File

@ -1,5 +1,6 @@
*.pyc *.pyc
*.pyo *.pyo
*.class
*~ *~
*.DS_Store *.DS_Store
wine-py2exe/ wine-py2exe/
@ -32,4 +33,4 @@ test/testdata
.tox .tox
youtube-dl.zsh youtube-dl.zsh
.idea .idea
.idea/* .idea/*

View File

@ -161,3 +161,9 @@ Jens Wille
Robin Houtevelts Robin Houtevelts
Patrick Griffis Patrick Griffis
Aidan Rowe Aidan Rowe
mutantmonkey
Ben Congdon
Kacper Michajłow
José Joaquín Atria
Viťas Strádal
Kagami Hiiragi

View File

@ -85,7 +85,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file
If you want to create a build of youtube-dl yourself, you'll need If you want to create a build of youtube-dl yourself, you'll need
* python * python
* make * make (both GNU make and BSD make are supported)
* pandoc * pandoc
* zip * zip
* nosetests * nosetests

View File

@ -3,6 +3,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bas
clean: clean:
rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
find . -name "*.pyc" -delete find . -name "*.pyc" -delete
find . -name "*.class" -delete
PREFIX ?= /usr/local PREFIX ?= /usr/local
BINDIR ?= $(PREFIX)/bin BINDIR ?= $(PREFIX)/bin
@ -11,15 +12,7 @@ SHAREDIR ?= $(PREFIX)/share
PYTHON ?= /usr/bin/env python PYTHON ?= /usr/bin/env python
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
ifeq ($(PREFIX),/usr) SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi
SYSCONFDIR=/etc
else
ifeq ($(PREFIX),/usr/local)
SYSCONFDIR=/etc
else
SYSCONFDIR=$(PREFIX)/etc
endif
endif
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
install -d $(DESTDIR)$(BINDIR) install -d $(DESTDIR)$(BINDIR)
@ -44,7 +37,7 @@ test:
ot: offlinetest ot: offlinetest
offlinetest: codetest offlinetest: codetest
nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py $(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
tar: youtube-dl.tar.gz tar: youtube-dl.tar.gz

View File

@ -80,6 +80,8 @@ which means you can modify it, redistribute it or use it however you like.
on Windows) on Windows)
--flat-playlist Do not extract the videos of a playlist, --flat-playlist Do not extract the videos of a playlist,
only list them. only list them.
--mark-watched Mark videos watched (YouTube only)
--no-mark-watched Do not mark videos watched (YouTube only)
--no-color Do not emit color codes in output --no-color Do not emit color codes in output
## Network Options: ## Network Options:
@ -172,6 +174,8 @@ which means you can modify it, redistribute it or use it however you like.
(e.g. 50K or 4.2M) (e.g. 50K or 4.2M)
-R, --retries RETRIES Number of retries (default is 10), or -R, --retries RETRIES Number of retries (default is 10), or
"infinite". "infinite".
--fragment-retries RETRIES Number of retries for a fragment (default
is 10), or "infinite" (DASH only)
--buffer-size SIZE Size of download buffer (e.g. 1024 or 16K) --buffer-size SIZE Size of download buffer (e.g. 1024 or 16K)
(default is 1024) (default is 1024)
--no-resize-buffer Do not automatically adjust the buffer --no-resize-buffer Do not automatically adjust the buffer
@ -189,7 +193,7 @@ which means you can modify it, redistribute it or use it however you like.
to play it) to play it)
--external-downloader COMMAND Use the specified external downloader. --external-downloader COMMAND Use the specified external downloader.
Currently supports Currently supports
aria2c,axel,curl,httpie,wget aria2c,avconv,axel,curl,ffmpeg,httpie,wget
--external-downloader-args ARGS Give these arguments to the external --external-downloader-args ARGS Give these arguments to the external
downloader downloader
@ -384,8 +388,8 @@ which means you can modify it, redistribute it or use it however you like.
--no-post-overwrites Do not overwrite post-processed files; the --no-post-overwrites Do not overwrite post-processed files; the
post-processed files are overwritten by post-processed files are overwritten by
default default
--embed-subs Embed subtitles in the video (only for mkv --embed-subs Embed subtitles in the video (only for mp4,
and mp4 videos) webm and mkv videos)
--embed-thumbnail Embed thumbnail in the audio as cover art --embed-thumbnail Embed thumbnail in the audio as cover art
--add-metadata Write metadata to the video file --add-metadata Write metadata to the video file
--metadata-from-title FORMAT Parse additional metadata like song title / --metadata-from-title FORMAT Parse additional metadata like song title /
@ -839,7 +843,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file
If you want to create a build of youtube-dl yourself, you'll need If you want to create a build of youtube-dl yourself, you'll need
* python * python
* make * make (both GNU make and BSD make are supported)
* pandoc * pandoc
* zip * zip
* nosetests * nosetests

View File

@ -54,6 +54,7 @@
- **AtresPlayer** - **AtresPlayer**
- **ATTTechChannel** - **ATTTechChannel**
- **AudiMedia** - **AudiMedia**
- **AudioBoom**
- **audiomack** - **audiomack**
- **audiomack:album** - **audiomack:album**
- **Azubu** - **Azubu**
@ -73,6 +74,7 @@
- **Bigflix** - **Bigflix**
- **Bild**: Bild.de - **Bild**: Bild.de
- **BiliBili** - **BiliBili**
- **BioBioChileTV**
- **BleacherReport** - **BleacherReport**
- **BleacherReportCMS** - **BleacherReportCMS**
- **blinkx** - **blinkx**
@ -80,6 +82,7 @@
- **BokeCC** - **BokeCC**
- **Bpb**: Bundeszentrale für politische Bildung - **Bpb**: Bundeszentrale für politische Bildung
- **BR**: Bayerischer Rundfunk Mediathek - **BR**: Bayerischer Rundfunk Mediathek
- **BravoTV**
- **Break** - **Break**
- **brightcove:legacy** - **brightcove:legacy**
- **brightcove:new** - **brightcove:new**
@ -98,6 +101,7 @@
- **CBSNews**: CBS News - **CBSNews**: CBS News
- **CBSNewsLiveVideo**: CBS News Live Videos - **CBSNewsLiveVideo**: CBS News Live Videos
- **CBSSports** - **CBSSports**
- **CDA**
- **CeskaTelevize** - **CeskaTelevize**
- **channel9**: Channel 9 - **channel9**: Channel 9
- **Chaturbate** - **Chaturbate**
@ -167,6 +171,8 @@
- **Dump** - **Dump**
- **Dumpert** - **Dumpert**
- **dvtv**: http://video.aktualne.cz/ - **dvtv**: http://video.aktualne.cz/
- **dw**
- **dw:article**
- **EaglePlatform** - **EaglePlatform**
- **EbaumsWorld** - **EbaumsWorld**
- **EchoMsk** - **EchoMsk**
@ -190,10 +196,10 @@
- **ExpoTV** - **ExpoTV**
- **ExtremeTube** - **ExtremeTube**
- **facebook** - **facebook**
- **facebook:post**
- **faz.net** - **faz.net**
- **fc2** - **fc2**
- **Fczenit** - **Fczenit**
- **features.aol.com**
- **fernsehkritik.tv** - **fernsehkritik.tv**
- **Firstpost** - **Firstpost**
- **FiveTV** - **FiveTV**
@ -240,6 +246,7 @@
- **GPUTechConf** - **GPUTechConf**
- **Groupon** - **Groupon**
- **Hark** - **Hark**
- **HBO**
- **HearThisAt** - **HearThisAt**
- **Heise** - **Heise**
- **HellPorno** - **HellPorno**
@ -293,6 +300,7 @@
- **kontrtube**: KontrTube.ru - Труба зовёт - **kontrtube**: KontrTube.ru - Труба зовёт
- **KrasView**: Красвью - **KrasView**: Красвью
- **Ku6** - **Ku6**
- **KUSI**
- **kuwo:album**: 酷我音乐 - 专辑 - **kuwo:album**: 酷我音乐 - 专辑
- **kuwo:category**: 酷我音乐 - 分类 - **kuwo:category**: 酷我音乐 - 分类
- **kuwo:chart**: 酷我音乐 - 排行榜 - **kuwo:chart**: 酷我音乐 - 排行榜
@ -301,12 +309,11 @@
- **kuwo:song**: 酷我音乐 - **kuwo:song**: 酷我音乐
- **la7.tv** - **la7.tv**
- **Laola1Tv** - **Laola1Tv**
- **Le**: 乐视网
- **Lecture2Go** - **Lecture2Go**
- **Lemonde** - **Lemonde**
- **Letv**: 乐视网 - **LePlaylist**
- **LetvCloud**: 乐视云 - **LetvCloud**: 乐视云
- **LetvPlaylist**
- **LetvTv**
- **Libsyn** - **Libsyn**
- **life:embed** - **life:embed**
- **lifenews**: LIFE | NEWS - **lifenews**: LIFE | NEWS
@ -324,6 +331,7 @@
- **m6** - **m6**
- **macgamestore**: MacGameStore trailers - **macgamestore**: MacGameStore trailers
- **mailru**: Видео@Mail.Ru - **mailru**: Видео@Mail.Ru
- **MakersChannel**
- **MakerTV** - **MakerTV**
- **Malemotion** - **Malemotion**
- **MatchTV** - **MatchTV**
@ -334,10 +342,12 @@
- **Mgoon** - **Mgoon**
- **Minhateca** - **Minhateca**
- **MinistryGrid** - **MinistryGrid**
- **Minoto**
- **miomio.tv** - **miomio.tv**
- **MiTele**: mitele.es - **MiTele**: mitele.es
- **mixcloud** - **mixcloud**
- **MLB** - **MLB**
- **Mnet**
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
- **Mofosex** - **Mofosex**
- **Mojvideo** - **Mojvideo**
@ -421,6 +431,7 @@
- **Npr** - **Npr**
- **NRK** - **NRK**
- **NRKPlaylist** - **NRKPlaylist**
- **NRKSkole**: NRK Skole
- **NRKTV**: NRK TV and NRK Radio - **NRKTV**: NRK TV and NRK Radio
- **ntv.ru** - **ntv.ru**
- **Nuvid** - **Nuvid**
@ -433,6 +444,7 @@
- **OnionStudios** - **OnionStudios**
- **Ooyala** - **Ooyala**
- **OoyalaExternal** - **OoyalaExternal**
- **Openload**
- **OraTV** - **OraTV**
- **orf:fm4**: radio FM4 - **orf:fm4**: radio FM4
- **orf:iptv**: iptv.ORF.at - **orf:iptv**: iptv.ORF.at
@ -493,6 +505,7 @@
- **Restudy** - **Restudy**
- **ReverbNation** - **ReverbNation**
- **Revision3** - **Revision3**
- **RICE**
- **RingTV** - **RingTV**
- **RottenTomatoes** - **RottenTomatoes**
- **Roxwel** - **Roxwel**
@ -517,6 +530,7 @@
- **RUTV**: RUTV.RU - **RUTV**: RUTV.RU
- **Ruutu** - **Ruutu**
- **safari**: safaribooksonline.com online video - **safari**: safaribooksonline.com online video
- **safari:api**
- **safari:course**: safaribooksonline.com online courses - **safari:course**: safaribooksonline.com online courses
- **Sandia**: Sandia National Laboratories - **Sandia**: Sandia National Laboratories
- **Sapo**: SAPO Vídeos - **Sapo**: SAPO Vídeos
@ -610,7 +624,9 @@
- **TheOnion** - **TheOnion**
- **ThePlatform** - **ThePlatform**
- **ThePlatformFeed** - **ThePlatformFeed**
- **TheScene**
- **TheSixtyOne** - **TheSixtyOne**
- **TheStar**
- **ThisAmericanLife** - **ThisAmericanLife**
- **ThisAV** - **ThisAV**
- **THVideo** - **THVideo**
@ -644,6 +660,7 @@
- **tv.dfb.de** - **tv.dfb.de**
- **TV2** - **TV2**
- **TV2Article** - **TV2Article**
- **TV3**
- **TV4**: tv4.se and tv4play.se - **TV4**: tv4.se and tv4play.se
- **TVC** - **TVC**
- **TVCArticle** - **TVCArticle**
@ -669,6 +686,7 @@
- **UDNEmbed**: 聯合影音 - **UDNEmbed**: 聯合影音
- **Unistra** - **Unistra**
- **Urort**: NRK P3 Urørt - **Urort**: NRK P3 Urørt
- **USAToday**
- **ustream** - **ustream**
- **ustream:channel** - **ustream:channel**
- **Ustudio** - **Ustudio**
@ -682,6 +700,7 @@
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
- **vh1.com** - **vh1.com**
- **Vice** - **Vice**
- **ViceShow**
- **Viddler** - **Viddler**
- **video.google:search**: Google Video search - **video.google:search**: Google Video search
- **video.mit.edu** - **video.mit.edu**
@ -709,6 +728,7 @@
- **vimeo:channel** - **vimeo:channel**
- **vimeo:group** - **vimeo:group**
- **vimeo:likes**: Vimeo user likes - **vimeo:likes**: Vimeo user likes
- **vimeo:ondemand**
- **vimeo:review**: Review pages on vimeo - **vimeo:review**: Review pages on vimeo
- **vimeo:user** - **vimeo:user**
- **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication) - **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)
@ -773,6 +793,7 @@
- **youtube:channel**: YouTube.com channels - **youtube:channel**: YouTube.com channels
- **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication) - **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication)
- **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication) - **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
- **youtube:live**: YouTube.com live streams
- **youtube:playlist**: YouTube.com playlists - **youtube:playlist**: YouTube.com playlists
- **youtube:playlists**: YouTube.com user/channel playlists - **youtube:playlists**: YouTube.com user/channel playlists
- **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication) - **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)

View File

@ -11,8 +11,11 @@ import sys
import youtube_dl.extractor import youtube_dl.extractor
from youtube_dl import YoutubeDL from youtube_dl import YoutubeDL
from youtube_dl.utils import ( from youtube_dl.compat import (
compat_os_name,
compat_str, compat_str,
)
from youtube_dl.utils import (
preferredencoding, preferredencoding,
write_string, write_string,
) )
@ -42,7 +45,7 @@ def report_warning(message):
Print the message to stderr, it will be prefixed with 'WARNING:' Print the message to stderr, it will be prefixed with 'WARNING:'
If stderr is a tty file the 'WARNING:' will be colored If stderr is a tty file the 'WARNING:' will be colored
''' '''
if sys.stderr.isatty() and os.name != 'nt': if sys.stderr.isatty() and compat_os_name != 'nt':
_msg_header = '\033[0;33mWARNING:\033[0m' _msg_header = '\033[0;33mWARNING:\033[0m'
else: else:
_msg_header = 'WARNING:' _msg_header = 'WARNING:'

View File

@ -222,6 +222,11 @@ class TestFormatSelection(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'dash-video-low') self.assertEqual(downloaded['format_id'], 'dash-video-low')
ydl = YDL({'format': 'bestvideo[format_id^=dash][format_id$=low]'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'dash-video-low')
formats = [ formats = [
{'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL}, {'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL},
] ]
@ -502,6 +507,9 @@ class TestYoutubeDL(unittest.TestCase):
assertRegexpMatches(self, ydl._format_note({ assertRegexpMatches(self, ydl._format_note({
'vbr': 10, 'vbr': 10,
}), '^\s*10k$') }), '^\s*10k$')
assertRegexpMatches(self, ydl._format_note({
'fps': 30,
}), '^30fps$')
def test_postprocessors(self): def test_postprocessors(self):
filename = 'post-processor-testfile.mp4' filename = 'post-processor-testfile.mp4'

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
# Allow direct execution # Allow direct execution
@ -52,7 +53,12 @@ class TestHTTP(unittest.TestCase):
('localhost', 0), HTTPTestRequestHandler) ('localhost', 0), HTTPTestRequestHandler)
self.httpd.socket = ssl.wrap_socket( self.httpd.socket = ssl.wrap_socket(
self.httpd.socket, certfile=certfn, server_side=True) self.httpd.socket, certfile=certfn, server_side=True)
self.port = self.httpd.socket.getsockname()[1] if os.name == 'java':
# In Jython SSLSocket is not a subclass of socket.socket
sock = self.httpd.socket.sock
else:
sock = self.httpd.socket
self.port = sock.getsockname()[1]
self.server_thread = threading.Thread(target=self.httpd.serve_forever) self.server_thread = threading.Thread(target=self.httpd.serve_forever)
self.server_thread.daemon = True self.server_thread.daemon = True
self.server_thread.start() self.server_thread.start()
@ -115,5 +121,14 @@ class TestProxy(unittest.TestCase):
response = ydl.urlopen(req).read().decode('utf-8') response = ydl.urlopen(req).read().decode('utf-8')
self.assertEqual(response, 'cn: {0}'.format(url)) self.assertEqual(response, 'cn: {0}'.format(url))
def test_proxy_with_idn(self):
ydl = YoutubeDL({
'proxy': 'localhost:{0}'.format(self.port),
})
url = 'http://中文.tw/'
response = ydl.urlopen(url).read().decode('utf-8')
# b'xn--fiq228c' is '中文'.encode('idna')
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -28,6 +28,7 @@ from youtube_dl.utils import (
encodeFilename, encodeFilename,
escape_rfc3986, escape_rfc3986,
escape_url, escape_url,
extract_attributes,
ExtractorError, ExtractorError,
find_xpath_attr, find_xpath_attr,
fix_xml_ampersands, fix_xml_ampersands,
@ -41,6 +42,7 @@ from youtube_dl.utils import (
orderedSet, orderedSet,
parse_duration, parse_duration,
parse_filesize, parse_filesize,
parse_count,
parse_iso8601, parse_iso8601,
read_batch_urls, read_batch_urls,
sanitize_filename, sanitize_filename,
@ -61,6 +63,7 @@ from youtube_dl.utils import (
lowercase_escape, lowercase_escape,
url_basename, url_basename,
urlencode_postdata, urlencode_postdata,
update_url_query,
version_tuple, version_tuple,
xpath_with_ns, xpath_with_ns,
xpath_element, xpath_element,
@ -75,7 +78,10 @@ from youtube_dl.utils import (
cli_bool_option, cli_bool_option,
) )
from youtube_dl.compat import ( from youtube_dl.compat import (
compat_chr,
compat_etree_fromstring, compat_etree_fromstring,
compat_urlparse,
compat_parse_qs,
) )
@ -454,6 +460,40 @@ class TestUtil(unittest.TestCase):
data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'}) data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'})
self.assertTrue(isinstance(data, bytes)) self.assertTrue(isinstance(data, bytes))
def test_update_url_query(self):
def query_dict(url):
return compat_parse_qs(compat_urlparse.urlparse(url).query)
self.assertEqual(query_dict(update_url_query(
'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})),
query_dict('http://example.com/path?quality=HD&format=mp4'))
self.assertEqual(query_dict(update_url_query(
'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})),
query_dict('http://example.com/path?system=LINUX&system=WINDOWS'))
self.assertEqual(query_dict(update_url_query(
'http://example.com/path', {'fields': 'id,formats,subtitles'})),
query_dict('http://example.com/path?fields=id,formats,subtitles'))
self.assertEqual(query_dict(update_url_query(
'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})),
query_dict('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails'))
self.assertEqual(query_dict(update_url_query(
'http://example.com/path?manifest=f4m', {'manifest': []})),
query_dict('http://example.com/path'))
self.assertEqual(query_dict(update_url_query(
'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})),
query_dict('http://example.com/path?system=LINUX'))
self.assertEqual(query_dict(update_url_query(
'http://example.com/path', {'fields': b'id,formats,subtitles'})),
query_dict('http://example.com/path?fields=id,formats,subtitles'))
self.assertEqual(query_dict(update_url_query(
'http://example.com/path', {'width': 1080, 'height': 720})),
query_dict('http://example.com/path?width=1080&height=720'))
self.assertEqual(query_dict(update_url_query(
'http://example.com/path', {'bitrate': 5020.43})),
query_dict('http://example.com/path?bitrate=5020.43'))
self.assertEqual(query_dict(update_url_query(
'http://example.com/path', {'test': '第二行тест'})),
query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
def test_dict_get(self): def test_dict_get(self):
FALSE_VALUES = { FALSE_VALUES = {
'none': None, 'none': None,
@ -537,11 +577,11 @@ class TestUtil(unittest.TestCase):
) )
self.assertEqual( self.assertEqual(
escape_url('http://тест.рф/фрагмент'), escape_url('http://тест.рф/фрагмент'),
'http://тест.рф/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82' 'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82'
) )
self.assertEqual( self.assertEqual(
escape_url('http://тест.рф/абв?абв=абв#абв'), escape_url('http://тест.рф/абв?абв=абв#абв'),
'http://тест.рф/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2' 'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2'
) )
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
@ -591,6 +631,44 @@ class TestUtil(unittest.TestCase):
on = js_to_json('{"abc": "def",}') on = js_to_json('{"abc": "def",}')
self.assertEqual(json.loads(on), {'abc': 'def'}) self.assertEqual(json.loads(on), {'abc': 'def'})
def test_extract_attributes(self):
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
self.assertEqual(extract_attributes('<e x=y>'), {'x': 'y'})
self.assertEqual(extract_attributes('<e x="a \'b\' c">'), {'x': "a 'b' c"})
self.assertEqual(extract_attributes('<e x=\'a "b" c\'>'), {'x': 'a "b" c'})
self.assertEqual(extract_attributes('<e x="&#121;">'), {'x': 'y'})
self.assertEqual(extract_attributes('<e x="&#x79;">'), {'x': 'y'})
self.assertEqual(extract_attributes('<e x="&amp;">'), {'x': '&'}) # XML
self.assertEqual(extract_attributes('<e x="&quot;">'), {'x': '"'})
self.assertEqual(extract_attributes('<e x="&pound;">'), {'x': '£'}) # HTML 3.2
self.assertEqual(extract_attributes('<e x="&lambda;">'), {'x': 'λ'}) # HTML 4.0
self.assertEqual(extract_attributes('<e x="&foo">'), {'x': '&foo'})
self.assertEqual(extract_attributes('<e x="\'">'), {'x': "'"})
self.assertEqual(extract_attributes('<e x=\'"\'>'), {'x': '"'})
self.assertEqual(extract_attributes('<e x >'), {'x': None})
self.assertEqual(extract_attributes('<e x=y a>'), {'x': 'y', 'a': None})
self.assertEqual(extract_attributes('<e x= y>'), {'x': 'y'})
self.assertEqual(extract_attributes('<e x=1 y=2 x=3>'), {'y': '2', 'x': '3'})
self.assertEqual(extract_attributes('<e \nx=\ny\n>'), {'x': 'y'})
self.assertEqual(extract_attributes('<e \nx=\n"y"\n>'), {'x': 'y'})
self.assertEqual(extract_attributes("<e \nx=\n'y'\n>"), {'x': 'y'})
self.assertEqual(extract_attributes('<e \nx="\ny\n">'), {'x': '\ny\n'})
self.assertEqual(extract_attributes('<e CAPS=x>'), {'caps': 'x'}) # Names lowercased
self.assertEqual(extract_attributes('<e x=1 X=2>'), {'x': '2'})
self.assertEqual(extract_attributes('<e X=1 x=2>'), {'x': '2'})
self.assertEqual(extract_attributes('<e _:funny-name1=1>'), {'_:funny-name1': '1'})
self.assertEqual(extract_attributes('<e x="Fáilte 世界 \U0001f600">'), {'x': 'Fáilte 世界 \U0001f600'})
self.assertEqual(extract_attributes('<e x="décompose&#769;">'), {'x': 'décompose\u0301'})
# "Narrow" Python builds don't support unicode code points outside BMP.
try:
compat_chr(0x10000)
supports_outside_bmp = True
except ValueError:
supports_outside_bmp = False
if supports_outside_bmp:
self.assertEqual(extract_attributes('<e x="Smile &#128512;!">'), {'x': 'Smile \U0001f600!'})
def test_clean_html(self): def test_clean_html(self):
self.assertEqual(clean_html('a:\nb'), 'a: b') self.assertEqual(clean_html('a:\nb'), 'a: b')
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
@ -616,6 +694,17 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_filesize('1.2Tb'), 1200000000000) self.assertEqual(parse_filesize('1.2Tb'), 1200000000000)
self.assertEqual(parse_filesize('1,24 KB'), 1240) self.assertEqual(parse_filesize('1,24 KB'), 1240)
def test_parse_count(self):
self.assertEqual(parse_count(None), None)
self.assertEqual(parse_count(''), None)
self.assertEqual(parse_count('0'), 0)
self.assertEqual(parse_count('1000'), 1000)
self.assertEqual(parse_count('1.000'), 1000)
self.assertEqual(parse_count('1.1k'), 1100)
self.assertEqual(parse_count('1.1kk'), 1100000)
self.assertEqual(parse_count('1.1kk '), 1100000)
self.assertEqual(parse_count('1.1kk views'), 1100000)
def test_version_tuple(self): def test_version_tuple(self):
self.assertEqual(version_tuple('1'), (1,)) self.assertEqual(version_tuple('1'), (1,))
self.assertEqual(version_tuple('10.23.344'), (10, 23, 344)) self.assertEqual(version_tuple('10.23.344'), (10, 23, 344))

View File

@ -8,6 +8,6 @@ deps =
passenv = HOME passenv = HOME
defaultargs = test --exclude test_download.py --exclude test_age_restriction.py defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
--exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_subtitles.py --exclude test_write_annotations.py
--exclude test_youtube_lists.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html
# test.test_download:TestDownload.test_NowVideo # test.test_download:TestDownload.test_NowVideo

View File

@ -24,9 +24,6 @@ import time
import tokenize import tokenize
import traceback import traceback
if os.name == 'nt':
import ctypes
from .compat import ( from .compat import (
compat_basestring, compat_basestring,
compat_cookiejar, compat_cookiejar,
@ -34,6 +31,7 @@ from .compat import (
compat_get_terminal_size, compat_get_terminal_size,
compat_http_client, compat_http_client,
compat_kwargs, compat_kwargs,
compat_os_name,
compat_str, compat_str,
compat_tokenize_tokenize, compat_tokenize_tokenize,
compat_urllib_error, compat_urllib_error,
@ -87,6 +85,7 @@ from .extractor import get_info_extractor, gen_extractors
from .downloader import get_suitable_downloader from .downloader import get_suitable_downloader
from .downloader.rtmp import rtmpdump_version from .downloader.rtmp import rtmpdump_version
from .postprocessor import ( from .postprocessor import (
FFmpegFixupM3u8PP,
FFmpegFixupM4aPP, FFmpegFixupM4aPP,
FFmpegFixupStretchedPP, FFmpegFixupStretchedPP,
FFmpegMergerPP, FFmpegMergerPP,
@ -95,6 +94,9 @@ from .postprocessor import (
) )
from .version import __version__ from .version import __version__
if compat_os_name == 'nt':
import ctypes
class YoutubeDL(object): class YoutubeDL(object):
"""YoutubeDL class. """YoutubeDL class.
@ -450,7 +452,7 @@ class YoutubeDL(object):
def to_console_title(self, message): def to_console_title(self, message):
if not self.params.get('consoletitle', False): if not self.params.get('consoletitle', False):
return return
if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
# c_wchar_p() might not be necessary if `message` is # c_wchar_p() might not be necessary if `message` is
# already of type unicode() # already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
@ -521,7 +523,7 @@ class YoutubeDL(object):
else: else:
if self.params.get('no_warnings'): if self.params.get('no_warnings'):
return return
if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
_msg_header = '\033[0;33mWARNING:\033[0m' _msg_header = '\033[0;33mWARNING:\033[0m'
else: else:
_msg_header = 'WARNING:' _msg_header = 'WARNING:'
@ -533,7 +535,7 @@ class YoutubeDL(object):
Do the same as trouble, but prefixes the message with 'ERROR:', colored Do the same as trouble, but prefixes the message with 'ERROR:', colored
in red if stderr is a tty file. in red if stderr is a tty file.
''' '''
if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
_msg_header = '\033[0;31mERROR:\033[0m' _msg_header = '\033[0;31mERROR:\033[0m'
else: else:
_msg_header = 'ERROR:' _msg_header = 'ERROR:'
@ -566,7 +568,7 @@ class YoutubeDL(object):
elif template_dict.get('height'): elif template_dict.get('height'):
template_dict['resolution'] = '%sp' % template_dict['height'] template_dict['resolution'] = '%sp' % template_dict['height']
elif template_dict.get('width'): elif template_dict.get('width'):
template_dict['resolution'] = '?x%d' % template_dict['width'] template_dict['resolution'] = '%dx?' % template_dict['width']
sanitize = lambda k, v: sanitize_filename( sanitize = lambda k, v: sanitize_filename(
compat_str(v), compat_str(v),
@ -903,7 +905,7 @@ class YoutubeDL(object):
'*=': lambda attr, value: value in attr, '*=': lambda attr, value: value in attr,
} }
str_operator_rex = re.compile(r'''(?x) str_operator_rex = re.compile(r'''(?x)
\s*(?P<key>ext|acodec|vcodec|container|protocol) \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)? \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
\s*(?P<value>[a-zA-Z0-9._-]+) \s*(?P<value>[a-zA-Z0-9._-]+)
\s*$ \s*$
@ -1232,6 +1234,10 @@ class YoutubeDL(object):
if t.get('id') is None: if t.get('id') is None:
t['id'] = '%d' % i t['id'] = '%d' % i
if self.params.get('list_thumbnails'):
self.list_thumbnails(info_dict)
return
if thumbnails and 'thumbnail' not in info_dict: if thumbnails and 'thumbnail' not in info_dict:
info_dict['thumbnail'] = thumbnails[-1]['url'] info_dict['thumbnail'] = thumbnails[-1]['url']
@ -1333,9 +1339,6 @@ class YoutubeDL(object):
if self.params.get('listformats'): if self.params.get('listformats'):
self.list_formats(info_dict) self.list_formats(info_dict)
return return
if self.params.get('list_thumbnails'):
self.list_thumbnails(info_dict)
return
req_format = self.params.get('format') req_format = self.params.get('format')
if req_format is None: if req_format is None:
@ -1637,6 +1640,8 @@ class YoutubeDL(object):
if fixup_policy is None: if fixup_policy is None:
fixup_policy = 'detect_or_warn' fixup_policy = 'detect_or_warn'
INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
stretched_ratio = info_dict.get('stretched_ratio') stretched_ratio = info_dict.get('stretched_ratio')
if stretched_ratio is not None and stretched_ratio != 1: if stretched_ratio is not None and stretched_ratio != 1:
if fixup_policy == 'warn': if fixup_policy == 'warn':
@ -1649,15 +1654,18 @@ class YoutubeDL(object):
info_dict['__postprocessors'].append(stretched_pp) info_dict['__postprocessors'].append(stretched_pp)
else: else:
self.report_warning( self.report_warning(
'%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % ( '%s: Non-uniform pixel ratio (%s). %s'
info_dict['id'], stretched_ratio)) % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
else: else:
assert fixup_policy in ('ignore', 'never') assert fixup_policy in ('ignore', 'never')
if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash': if (info_dict.get('requested_formats') is None and
info_dict.get('container') == 'm4a_dash'):
if fixup_policy == 'warn': if fixup_policy == 'warn':
self.report_warning('%s: writing DASH m4a. Only some players support this container.' % ( self.report_warning(
info_dict['id'])) '%s: writing DASH m4a. '
'Only some players support this container.'
% info_dict['id'])
elif fixup_policy == 'detect_or_warn': elif fixup_policy == 'detect_or_warn':
fixup_pp = FFmpegFixupM4aPP(self) fixup_pp = FFmpegFixupM4aPP(self)
if fixup_pp.available: if fixup_pp.available:
@ -1665,8 +1673,27 @@ class YoutubeDL(object):
info_dict['__postprocessors'].append(fixup_pp) info_dict['__postprocessors'].append(fixup_pp)
else: else:
self.report_warning( self.report_warning(
'%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % ( '%s: writing DASH m4a. '
info_dict['id'])) 'Only some players support this container. %s'
% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
else:
assert fixup_policy in ('ignore', 'never')
if (info_dict.get('protocol') == 'm3u8_native' or
info_dict.get('protocol') == 'm3u8' and
self.params.get('hls_prefer_native')):
if fixup_policy == 'warn':
self.report_warning('%s: malformated aac bitstream.' % (
info_dict['id']))
elif fixup_policy == 'detect_or_warn':
fixup_pp = FFmpegFixupM3u8PP(self)
if fixup_pp.available:
info_dict.setdefault('__postprocessors', [])
info_dict['__postprocessors'].append(fixup_pp)
else:
self.report_warning(
'%s: malformated aac bitstream. %s'
% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
else: else:
assert fixup_policy in ('ignore', 'never') assert fixup_policy in ('ignore', 'never')
@ -1809,7 +1836,7 @@ class YoutubeDL(object):
if fdict.get('language'): if fdict.get('language'):
if res: if res:
res += ' ' res += ' '
res += '[%s]' % fdict['language'] res += '[%s] ' % fdict['language']
if fdict.get('format_note') is not None: if fdict.get('format_note') is not None:
res += fdict['format_note'] + ' ' res += fdict['format_note'] + ' '
if fdict.get('tbr') is not None: if fdict.get('tbr') is not None:
@ -1830,7 +1857,9 @@ class YoutubeDL(object):
if fdict.get('vbr') is not None: if fdict.get('vbr') is not None:
res += '%4dk' % fdict['vbr'] res += '%4dk' % fdict['vbr']
if fdict.get('fps') is not None: if fdict.get('fps') is not None:
res += ', %sfps' % fdict['fps'] if res:
res += ', '
res += '%sfps' % fdict['fps']
if fdict.get('acodec') is not None: if fdict.get('acodec') is not None:
if res: if res:
res += ', ' res += ', '
@ -1873,13 +1902,8 @@ class YoutubeDL(object):
def list_thumbnails(self, info_dict): def list_thumbnails(self, info_dict):
thumbnails = info_dict.get('thumbnails') thumbnails = info_dict.get('thumbnails')
if not thumbnails: if not thumbnails:
tn_url = info_dict.get('thumbnail') self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
if tn_url: return
thumbnails = [{'id': '0', 'url': tn_url}]
else:
self.to_screen(
'[info] No thumbnails present for %s' % info_dict['id'])
return
self.to_screen( self.to_screen(
'[info] Thumbnails for %s:' % info_dict['id']) '[info] Thumbnails for %s:' % info_dict['id'])

View File

@ -144,14 +144,20 @@ def _real_main(argv=None):
if numeric_limit is None: if numeric_limit is None:
parser.error('invalid max_filesize specified') parser.error('invalid max_filesize specified')
opts.max_filesize = numeric_limit opts.max_filesize = numeric_limit
if opts.retries is not None:
if opts.retries in ('inf', 'infinite'): def parse_retries(retries):
opts_retries = float('inf') if retries in ('inf', 'infinite'):
parsed_retries = float('inf')
else: else:
try: try:
opts_retries = int(opts.retries) parsed_retries = int(retries)
except (TypeError, ValueError): except (TypeError, ValueError):
parser.error('invalid retry count specified') parser.error('invalid retry count specified')
return parsed_retries
if opts.retries is not None:
opts.retries = parse_retries(opts.retries)
if opts.fragment_retries is not None:
opts.fragment_retries = parse_retries(opts.fragment_retries)
if opts.buffersize is not None: if opts.buffersize is not None:
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
if numeric_buffersize is None: if numeric_buffersize is None:
@ -299,7 +305,8 @@ def _real_main(argv=None):
'force_generic_extractor': opts.force_generic_extractor, 'force_generic_extractor': opts.force_generic_extractor,
'ratelimit': opts.ratelimit, 'ratelimit': opts.ratelimit,
'nooverwrites': opts.nooverwrites, 'nooverwrites': opts.nooverwrites,
'retries': opts_retries, 'retries': opts.retries,
'fragment_retries': opts.fragment_retries,
'buffersize': opts.buffersize, 'buffersize': opts.buffersize,
'noresizebuffer': opts.noresizebuffer, 'noresizebuffer': opts.noresizebuffer,
'continuedl': opts.continue_dl, 'continuedl': opts.continue_dl,

View File

@ -77,6 +77,11 @@ try:
except ImportError: # Python 2 except ImportError: # Python 2
from urllib import urlretrieve as compat_urlretrieve from urllib import urlretrieve as compat_urlretrieve
try:
from html.parser import HTMLParser as compat_HTMLParser
except ImportError: # Python 2
from HTMLParser import HTMLParser as compat_HTMLParser
try: try:
from subprocess import DEVNULL from subprocess import DEVNULL
@ -251,6 +256,16 @@ else:
el.text = el.text.decode('utf-8') el.text = el.text.decode('utf-8')
return doc return doc
if sys.version_info < (2, 7):
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
# .//node does not match if a node is a direct child of . !
def compat_xpath(xpath):
if isinstance(xpath, compat_str):
xpath = xpath.encode('ascii')
return xpath
else:
compat_xpath = lambda xpath: xpath
try: try:
from urllib.parse import parse_qs as compat_parse_qs from urllib.parse import parse_qs as compat_parse_qs
except ImportError: # Python 2 except ImportError: # Python 2
@ -326,6 +341,9 @@ def compat_ord(c):
return ord(c) return ord(c)
compat_os_name = os._name if os.name == 'java' else os.name
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
compat_getenv = os.getenv compat_getenv = os.getenv
compat_expanduser = os.path.expanduser compat_expanduser = os.path.expanduser
@ -346,7 +364,7 @@ else:
# The following are os.path.expanduser implementations from cpython 2.7.8 stdlib # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
# for different platforms with correct environment variables decoding. # for different platforms with correct environment variables decoding.
if os.name == 'posix': if compat_os_name == 'posix':
def compat_expanduser(path): def compat_expanduser(path):
"""Expand ~ and ~user constructions. If user or $HOME is unknown, """Expand ~ and ~user constructions. If user or $HOME is unknown,
do nothing.""" do nothing."""
@ -370,7 +388,7 @@ else:
userhome = pwent.pw_dir userhome = pwent.pw_dir
userhome = userhome.rstrip('/') userhome = userhome.rstrip('/')
return (userhome + path[i:]) or '/' return (userhome + path[i:]) or '/'
elif os.name == 'nt' or os.name == 'ce': elif compat_os_name == 'nt' or compat_os_name == 'ce':
def compat_expanduser(path): def compat_expanduser(path):
"""Expand ~ and ~user constructs. """Expand ~ and ~user constructs.
@ -540,6 +558,7 @@ else:
from tokenize import generate_tokens as compat_tokenize_tokenize from tokenize import generate_tokens as compat_tokenize_tokenize
__all__ = [ __all__ = [
'compat_HTMLParser',
'compat_HTTPError', 'compat_HTTPError',
'compat_basestring', 'compat_basestring',
'compat_chr', 'compat_chr',
@ -556,6 +575,7 @@ __all__ = [
'compat_itertools_count', 'compat_itertools_count',
'compat_kwargs', 'compat_kwargs',
'compat_ord', 'compat_ord',
'compat_os_name',
'compat_parse_qs', 'compat_parse_qs',
'compat_print', 'compat_print',
'compat_shlex_split', 'compat_shlex_split',
@ -575,6 +595,7 @@ __all__ = [
'compat_urlparse', 'compat_urlparse',
'compat_urlretrieve', 'compat_urlretrieve',
'compat_xml_parse_error', 'compat_xml_parse_error',
'compat_xpath',
'shlex_quote', 'shlex_quote',
'subprocess_check_output', 'subprocess_check_output',
'workaround_optparse_bug9161', 'workaround_optparse_bug9161',

View File

@ -1,14 +1,16 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import FileDownloader from .common import FileDownloader
from .external import get_external_downloader
from .f4m import F4mFD from .f4m import F4mFD
from .hls import HlsFD from .hls import HlsFD
from .hls import NativeHlsFD
from .http import HttpFD from .http import HttpFD
from .rtsp import RtspFD
from .rtmp import RtmpFD from .rtmp import RtmpFD
from .dash import DashSegmentsFD from .dash import DashSegmentsFD
from .rtsp import RtspFD
from .external import (
get_external_downloader,
FFmpegFD,
)
from ..utils import ( from ..utils import (
determine_protocol, determine_protocol,
@ -16,8 +18,8 @@ from ..utils import (
PROTOCOL_MAP = { PROTOCOL_MAP = {
'rtmp': RtmpFD, 'rtmp': RtmpFD,
'm3u8_native': NativeHlsFD, 'm3u8_native': HlsFD,
'm3u8': HlsFD, 'm3u8': FFmpegFD,
'mms': RtspFD, 'mms': RtspFD,
'rtsp': RtspFD, 'rtsp': RtspFD,
'f4m': F4mFD, 'f4m': F4mFD,
@ -30,14 +32,17 @@ def get_suitable_downloader(info_dict, params={}):
protocol = determine_protocol(info_dict) protocol = determine_protocol(info_dict)
info_dict['protocol'] = protocol info_dict['protocol'] = protocol
# if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
# return FFmpegFD
external_downloader = params.get('external_downloader') external_downloader = params.get('external_downloader')
if external_downloader is not None: if external_downloader is not None:
ed = get_external_downloader(external_downloader) ed = get_external_downloader(external_downloader)
if ed.supports(info_dict): if ed.can_download(info_dict):
return ed return ed
if protocol == 'm3u8' and params.get('hls_prefer_native'): if protocol == 'm3u8' and params.get('hls_prefer_native'):
return NativeHlsFD return HlsFD
return PROTOCOL_MAP.get(protocol, HttpFD) return PROTOCOL_MAP.get(protocol, HttpFD)

View File

@ -5,6 +5,7 @@ import re
import sys import sys
import time import time
from ..compat import compat_os_name
from ..utils import ( from ..utils import (
encodeFilename, encodeFilename,
error_to_compat_str, error_to_compat_str,
@ -114,6 +115,10 @@ class FileDownloader(object):
return '%10s' % '---b/s' return '%10s' % '---b/s'
return '%10s' % ('%s/s' % format_bytes(speed)) return '%10s' % ('%s/s' % format_bytes(speed))
@staticmethod
def format_retries(retries):
return 'inf' if retries == float('inf') else '%.0f' % retries
@staticmethod @staticmethod
def best_block_size(elapsed_time, bytes): def best_block_size(elapsed_time, bytes):
new_min = max(bytes / 2.0, 1.0) new_min = max(bytes / 2.0, 1.0)
@ -219,7 +224,7 @@ class FileDownloader(object):
if self.params.get('progress_with_newline', False): if self.params.get('progress_with_newline', False):
self.to_screen(fullmsg) self.to_screen(fullmsg)
else: else:
if os.name == 'nt': if compat_os_name == 'nt':
prev_len = getattr(self, '_report_progress_prev_line_length', prev_len = getattr(self, '_report_progress_prev_line_length',
0) 0)
if prev_len > len(fullmsg): if prev_len > len(fullmsg):
@ -296,7 +301,9 @@ class FileDownloader(object):
def report_retry(self, count, retries): def report_retry(self, count, retries):
"""Report retry in case of HTTP error 5xx""" """Report retry in case of HTTP error 5xx"""
self.to_screen('[download] Got server HTTP error. Retrying (attempt %d of %.0f)...' % (count, retries)) self.to_screen(
'[download] Got server HTTP error. Retrying (attempt %d of %s)...'
% (count, self.format_retries(retries)))
def report_file_already_downloaded(self, file_name): def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded.""" """Report file has already been fully downloaded."""

View File

@ -4,6 +4,7 @@ import os
import re import re
from .fragment import FragmentFD from .fragment import FragmentFD
from ..compat import compat_urllib_error
from ..utils import ( from ..utils import (
sanitize_open, sanitize_open,
encodeFilename, encodeFilename,
@ -36,20 +37,41 @@ class DashSegmentsFD(FragmentFD):
segments_filenames = [] segments_filenames = []
def append_url_to_file(target_url, target_filename): fragment_retries = self.params.get('fragment_retries', 0)
success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)})
if not success: def append_url_to_file(target_url, tmp_filename, segment_name):
target_filename = '%s-%s' % (tmp_filename, segment_name)
count = 0
while count <= fragment_retries:
try:
success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)})
if not success:
return False
down, target_sanitized = sanitize_open(target_filename, 'rb')
ctx['dest_stream'].write(down.read())
down.close()
segments_filenames.append(target_sanitized)
break
except (compat_urllib_error.HTTPError, ) as err:
# YouTube may often return 404 HTTP error for a fragment causing the
# whole download to fail. However if the same fragment is immediately
# retried with the same request data this usually succeeds (1-2 attemps
# is usually enough) thus allowing to download the whole file successfully.
# So, we will retry all fragments that fail with 404 HTTP error for now.
if err.code != 404:
raise
# Retry fragment
count += 1
if count <= fragment_retries:
self.report_retry_fragment(segment_name, count, fragment_retries)
if count > fragment_retries:
self.report_error('giving up after %s fragment retries' % fragment_retries)
return False return False
down, target_sanitized = sanitize_open(target_filename, 'rb')
ctx['dest_stream'].write(down.read())
down.close()
segments_filenames.append(target_sanitized)
if initialization_url: if initialization_url:
append_url_to_file(initialization_url, ctx['tmpfilename'] + '-Init') append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init')
for i, segment_url in enumerate(segment_urls): for i, segment_url in enumerate(segment_urls):
segment_filename = '%s-Seg%d' % (ctx['tmpfilename'], i) append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i)
append_url_to_file(segment_url, segment_filename)
self._finish_frag_download(ctx) self._finish_frag_download(ctx)

View File

@ -2,8 +2,11 @@ from __future__ import unicode_literals
import os.path import os.path
import subprocess import subprocess
import sys
import re
from .common import FileDownloader from .common import FileDownloader
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
from ..utils import ( from ..utils import (
cli_option, cli_option,
cli_valueless_option, cli_valueless_option,
@ -11,6 +14,8 @@ from ..utils import (
cli_configuration_args, cli_configuration_args,
encodeFilename, encodeFilename,
encodeArgument, encodeArgument,
handle_youtubedl_headers,
check_executable,
) )
@ -45,10 +50,18 @@ class ExternalFD(FileDownloader):
def exe(self): def exe(self):
return self.params.get('external_downloader') return self.params.get('external_downloader')
@classmethod
def available(cls):
return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT])
@classmethod @classmethod
def supports(cls, info_dict): def supports(cls, info_dict):
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
@classmethod
def can_download(cls, info_dict):
return cls.available() and cls.supports(info_dict)
def _option(self, command_option, param): def _option(self, command_option, param):
return cli_option(self.params, command_option, param) return cli_option(self.params, command_option, param)
@ -76,6 +89,8 @@ class ExternalFD(FileDownloader):
class CurlFD(ExternalFD): class CurlFD(ExternalFD):
AVAILABLE_OPT = '-V'
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '--location', '-o', tmpfilename] cmd = [self.exe, '--location', '-o', tmpfilename]
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
@ -89,6 +104,8 @@ class CurlFD(ExternalFD):
class AxelFD(ExternalFD): class AxelFD(ExternalFD):
AVAILABLE_OPT = '-V'
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-o', tmpfilename] cmd = [self.exe, '-o', tmpfilename]
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
@ -99,6 +116,8 @@ class AxelFD(ExternalFD):
class WgetFD(ExternalFD): class WgetFD(ExternalFD):
AVAILABLE_OPT = '--version'
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
@ -112,6 +131,8 @@ class WgetFD(ExternalFD):
class Aria2cFD(ExternalFD): class Aria2cFD(ExternalFD):
AVAILABLE_OPT = '-v'
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c'] cmd = [self.exe, '-c']
cmd += self._configuration_args([ cmd += self._configuration_args([
@ -130,12 +151,112 @@ class Aria2cFD(ExternalFD):
class HttpieFD(ExternalFD): class HttpieFD(ExternalFD):
@classmethod
def available(cls):
return check_executable('http', ['--version'])
def _make_cmd(self, tmpfilename, info_dict): def _make_cmd(self, tmpfilename, info_dict):
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
for key, val in info_dict['http_headers'].items(): for key, val in info_dict['http_headers'].items():
cmd += ['%s:%s' % (key, val)] cmd += ['%s:%s' % (key, val)]
return cmd return cmd
class FFmpegFD(ExternalFD):
@classmethod
def supports(cls, info_dict):
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
@classmethod
def available(cls):
return FFmpegPostProcessor().available
def _call_downloader(self, tmpfilename, info_dict):
url = info_dict['url']
ffpp = FFmpegPostProcessor(downloader=self)
if not ffpp.available:
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
return False
ffpp.check_version()
args = [ffpp.executable, '-y']
args += self._configuration_args()
# start_time = info_dict.get('start_time') or 0
# if start_time:
# args += ['-ss', compat_str(start_time)]
# end_time = info_dict.get('end_time')
# if end_time:
# args += ['-t', compat_str(end_time - start_time)]
if info_dict['http_headers'] and re.match(r'^https?://', url):
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
headers = handle_youtubedl_headers(info_dict['http_headers'])
args += [
'-headers',
''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
protocol = info_dict.get('protocol')
if protocol == 'rtmp':
player_url = info_dict.get('player_url')
page_url = info_dict.get('page_url')
app = info_dict.get('app')
play_path = info_dict.get('play_path')
tc_url = info_dict.get('tc_url')
flash_version = info_dict.get('flash_version')
live = info_dict.get('rtmp_live', False)
if player_url is not None:
args += ['-rtmp_swfverify', player_url]
if page_url is not None:
args += ['-rtmp_pageurl', page_url]
if app is not None:
args += ['-rtmp_app', app]
if play_path is not None:
args += ['-rtmp_playpath', play_path]
if tc_url is not None:
args += ['-rtmp_tcurl', tc_url]
if flash_version is not None:
args += ['-rtmp_flashver', flash_version]
if live:
args += ['-rtmp_live', 'live']
args += ['-i', url, '-c', 'copy']
if protocol == 'm3u8':
if self.params.get('hls_use_mpegts', False):
args += ['-f', 'mpegts']
else:
args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
elif protocol == 'rtmp':
args += ['-f', 'flv']
else:
args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
args = [encodeArgument(opt) for opt in args]
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
self._debug_cmd(args)
proc = subprocess.Popen(args, stdin=subprocess.PIPE)
try:
retval = proc.wait()
except KeyboardInterrupt:
# subprocces.run would send the SIGKILL signal to ffmpeg and the
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
# produces a file that is playable (this is mostly useful for live
# streams). Note that Windows is not affected and produces playable
# files (see https://github.com/rg3/youtube-dl/issues/8300).
if sys.platform != 'win32':
proc.communicate(b'q')
raise
return retval
class AVconvFD(FFmpegFD):
pass
_BY_NAME = dict( _BY_NAME = dict(
(klass.get_basename(), klass) (klass.get_basename(), klass)
for name, klass in globals().items() for name, klass in globals().items()

View File

@ -19,8 +19,17 @@ class HttpQuietDownloader(HttpFD):
class FragmentFD(FileDownloader): class FragmentFD(FileDownloader):
""" """
A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests). A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests).
Available options:
fragment_retries: Number of times to retry a fragment for HTTP error (DASH only)
""" """
def report_retry_fragment(self, fragment_name, count, retries):
self.to_screen(
'[download] Got server HTTP error. Retrying fragment %s (attempt %d of %s)...'
% (fragment_name, count, self.format_retries(retries)))
def _prepare_and_start_frag_download(self, ctx): def _prepare_and_start_frag_download(self, ctx):
self._prepare_frag_download(ctx) self._prepare_frag_download(ctx)
self._start_frag_download(ctx) self._start_frag_download(ctx)
@ -99,7 +108,8 @@ class FragmentFD(FileDownloader):
state['eta'] = self.calc_eta( state['eta'] = self.calc_eta(
start, time_now, estimated_size, start, time_now, estimated_size,
state['downloaded_bytes']) state['downloaded_bytes'])
state['speed'] = s.get('speed') state['speed'] = s.get('speed') or ctx.get('speed')
ctx['speed'] = state['speed']
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
self._hook_progress(state) self._hook_progress(state)

View File

@ -1,87 +1,19 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import os import os.path
import re import re
import subprocess
import sys
from .common import FileDownloader
from .fragment import FragmentFD from .fragment import FragmentFD
from ..compat import compat_urlparse from ..compat import compat_urlparse
from ..postprocessor.ffmpeg import FFmpegPostProcessor
from ..utils import ( from ..utils import (
encodeArgument,
encodeFilename, encodeFilename,
sanitize_open, sanitize_open,
handle_youtubedl_headers,
) )
class HlsFD(FileDownloader): class HlsFD(FragmentFD):
def real_download(self, filename, info_dict): """ A limited implementation that does not require ffmpeg """
url = info_dict['url']
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
ffpp = FFmpegPostProcessor(downloader=self)
if not ffpp.available:
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
return False
ffpp.check_version()
args = [ffpp.executable, '-y']
if info_dict['http_headers'] and re.match(r'^https?://', url):
# Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
# [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
headers = handle_youtubedl_headers(info_dict['http_headers'])
args += [
'-headers',
''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
args += ['-i', url, '-c', 'copy']
if self.params.get('hls_use_mpegts', False):
args += ['-f', 'mpegts']
else:
args += ['-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
args = [encodeArgument(opt) for opt in args]
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
self._debug_cmd(args)
proc = subprocess.Popen(args, stdin=subprocess.PIPE)
try:
retval = proc.wait()
except KeyboardInterrupt:
# subprocces.run would send the SIGKILL signal to ffmpeg and the
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
# produces a file that is playable (this is mostly useful for live
# streams). Note that Windows is not affected and produces playable
# files (see https://github.com/rg3/youtube-dl/issues/8300).
if sys.platform != 'win32':
proc.communicate(b'q')
raise
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
'total_bytes': fsize,
'filename': filename,
'status': 'finished',
})
return True
else:
self.to_stderr('\n')
self.report_error('%s exited with code %d' % (ffpp.basename, retval))
return False
class NativeHlsFD(FragmentFD):
""" A more limited implementation that does not require ffmpeg """
FD_NAME = 'hlsnative' FD_NAME = 'hlsnative'

View File

@ -23,7 +23,10 @@ from .alphaporno import AlphaPornoIE
from .animeondemand import AnimeOnDemandIE from .animeondemand import AnimeOnDemandIE
from .anitube import AnitubeIE from .anitube import AnitubeIE
from .anysex import AnySexIE from .anysex import AnySexIE
from .aol import AolIE from .aol import (
AolIE,
AolFeaturesIE,
)
from .allocine import AllocineIE from .allocine import AllocineIE
from .aparat import AparatIE from .aparat import AparatIE
from .appleconnect import AppleConnectIE from .appleconnect import AppleConnectIE
@ -51,6 +54,7 @@ from .arte import (
from .atresplayer import AtresPlayerIE from .atresplayer import AtresPlayerIE
from .atttechchannel import ATTTechChannelIE from .atttechchannel import ATTTechChannelIE
from .audimedia import AudiMediaIE from .audimedia import AudiMediaIE
from .audioboom import AudioBoomIE
from .audiomack import AudiomackIE, AudiomackAlbumIE from .audiomack import AudiomackIE, AudiomackAlbumIE
from .azubu import AzubuIE, AzubuLiveIE from .azubu import AzubuIE, AzubuLiveIE
from .baidu import BaiduVideoIE from .baidu import BaiduVideoIE
@ -68,6 +72,7 @@ from .bet import BetIE
from .bigflix import BigflixIE from .bigflix import BigflixIE
from .bild import BildIE from .bild import BildIE
from .bilibili import BiliBiliIE from .bilibili import BiliBiliIE
from .biobiochiletv import BioBioChileTVIE
from .bleacherreport import ( from .bleacherreport import (
BleacherReportIE, BleacherReportIE,
BleacherReportCMSIE, BleacherReportCMSIE,
@ -77,6 +82,7 @@ from .bloomberg import BloombergIE
from .bokecc import BokeCCIE from .bokecc import BokeCCIE
from .bpb import BpbIE from .bpb import BpbIE
from .br import BRIE from .br import BRIE
from .bravotv import BravoTVIE
from .breakcom import BreakIE from .breakcom import BreakIE
from .brightcove import ( from .brightcove import (
BrightcoveLegacyIE, BrightcoveLegacyIE,
@ -103,6 +109,7 @@ from .cbsnews import (
) )
from .cbssports import CBSSportsIE from .cbssports import CBSSportsIE
from .ccc import CCCIE from .ccc import CCCIE
from .cda import CDAIE
from .ceskatelevize import CeskaTelevizeIE from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE from .channel9 import Channel9IE
from .chaturbate import ChaturbateIE from .chaturbate import ChaturbateIE
@ -131,6 +138,7 @@ from .collegerama import CollegeRamaIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .comcarcoff import ComCarCoffIE from .comcarcoff import ComCarCoffIE
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .commonprotocols import RtmpIE
from .condenast import CondeNastIE from .condenast import CondeNastIE
from .cracked import CrackedIE from .cracked import CrackedIE
from .crackle import CrackleIE from .crackle import CrackleIE
@ -185,6 +193,10 @@ from .dumpert import DumpertIE
from .defense import DefenseGouvFrIE from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE from .discovery import DiscoveryIE
from .dropbox import DropboxIE from .dropbox import DropboxIE
from .dw import (
DWIE,
DWArticleIE,
)
from .eagleplatform import EaglePlatformIE from .eagleplatform import EaglePlatformIE
from .ebaumsworld import EbaumsWorldIE from .ebaumsworld import EbaumsWorldIE
from .echomsk import EchoMskIE from .echomsk import EchoMskIE
@ -209,10 +221,7 @@ from .everyonesmixtape import EveryonesMixtapeIE
from .exfm import ExfmIE from .exfm import ExfmIE
from .expotv import ExpoTVIE from .expotv import ExpoTVIE
from .extremetube import ExtremeTubeIE from .extremetube import ExtremeTubeIE
from .facebook import ( from .facebook import FacebookIE
FacebookIE,
FacebookPostIE,
)
from .faz import FazIE from .faz import FazIE
from .fc2 import FC2IE from .fc2 import FC2IE
from .fczenit import FczenitIE from .fczenit import FczenitIE
@ -277,6 +286,7 @@ from .goshgay import GoshgayIE
from .gputechconf import GPUTechConfIE from .gputechconf import GPUTechConfIE
from .groupon import GrouponIE from .groupon import GrouponIE
from .hark import HarkIE from .hark import HarkIE
from .hbo import HBOIE
from .hearthisat import HearThisAtIE from .hearthisat import HearThisAtIE
from .heise import HeiseIE from .heise import HeiseIE
from .hellporno import HellPornoIE from .hellporno import HellPornoIE
@ -340,6 +350,7 @@ from .konserthusetplay import KonserthusetPlayIE
from .kontrtube import KontrTubeIE from .kontrtube import KontrTubeIE
from .krasview import KrasViewIE from .krasview import KrasViewIE
from .ku6 import Ku6IE from .ku6 import Ku6IE
from .kusi import KUSIIE
from .kuwo import ( from .kuwo import (
KuwoIE, KuwoIE,
KuwoAlbumIE, KuwoAlbumIE,
@ -383,6 +394,7 @@ from .lynda import (
from .m6 import M6IE from .m6 import M6IE
from .macgamestore import MacGameStoreIE from .macgamestore import MacGameStoreIE
from .mailru import MailRuIE from .mailru import MailRuIE
from .makerschannel import MakersChannelIE
from .makertv import MakerTVIE from .makertv import MakerTVIE
from .malemotion import MalemotionIE from .malemotion import MalemotionIE
from .matchtv import MatchTVIE from .matchtv import MatchTVIE
@ -392,11 +404,13 @@ from .metacritic import MetacriticIE
from .mgoon import MgoonIE from .mgoon import MgoonIE
from .minhateca import MinhatecaIE from .minhateca import MinhatecaIE
from .ministrygrid import MinistryGridIE from .ministrygrid import MinistryGridIE
from .minoto import MinotoIE
from .miomio import MioMioIE from .miomio import MioMioIE
from .mit import TechTVMITIE, MITIE, OCWMITIE from .mit import TechTVMITIE, MITIE, OCWMITIE
from .mitele import MiTeleIE from .mitele import MiTeleIE
from .mixcloud import MixcloudIE from .mixcloud import MixcloudIE
from .mlb import MLBIE from .mlb import MLBIE
from .mnet import MnetIE
from .mpora import MporaIE from .mpora import MporaIE
from .moevideo import MoeVideoIE from .moevideo import MoeVideoIE
from .mofosex import MofosexIE from .mofosex import MofosexIE
@ -522,6 +536,7 @@ from .ooyala import (
OoyalaIE, OoyalaIE,
OoyalaExternalIE, OoyalaExternalIE,
) )
from .openload import OpenloadIE
from .ora import OraTVIE from .ora import OraTVIE
from .orf import ( from .orf import (
ORFTVthekIE, ORFTVthekIE,
@ -590,6 +605,7 @@ from .regiotv import RegioTVIE
from .restudy import RestudyIE from .restudy import RestudyIE
from .reverbnation import ReverbNationIE from .reverbnation import ReverbNationIE
from .revision3 import Revision3IE from .revision3 import Revision3IE
from .rice import RICEIE
from .ringtv import RingTVIE from .ringtv import RingTVIE
from .ro220 import Ro220IE from .ro220 import Ro220IE
from .rottentomatoes import RottenTomatoesIE from .rottentomatoes import RottenTomatoesIE
@ -616,6 +632,7 @@ from .ruutu import RuutuIE
from .sandia import SandiaIE from .sandia import SandiaIE
from .safari import ( from .safari import (
SafariIE, SafariIE,
SafariApiIE,
SafariCourseIE, SafariCourseIE,
) )
from .sapo import SapoIE from .sapo import SapoIE
@ -727,7 +744,9 @@ from .theplatform import (
ThePlatformIE, ThePlatformIE,
ThePlatformFeedIE, ThePlatformFeedIE,
) )
from .thescene import TheSceneIE
from .thesixtyone import TheSixtyOneIE from .thesixtyone import TheSixtyOneIE
from .thestar import TheStarIE
from .thisamericanlife import ThisAmericanLifeIE from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE from .thisav import ThisAVIE
from .tinypic import TinyPicIE from .tinypic import TinyPicIE
@ -774,6 +793,7 @@ from .tv2 import (
TV2IE, TV2IE,
TV2ArticleIE, TV2ArticleIE,
) )
from .tv3 import TV3IE
from .tv4 import TV4IE from .tv4 import TV4IE
from .tvc import ( from .tvc import (
TVCIE, TVCIE,
@ -813,6 +833,7 @@ from .udn import UDNEmbedIE
from .digiteka import DigitekaIE from .digiteka import DigitekaIE
from .unistra import UnistraIE from .unistra import UnistraIE
from .urort import UrortIE from .urort import UrortIE
from .usatoday import USATodayIE
from .ustream import UstreamIE, UstreamChannelIE from .ustream import UstreamIE, UstreamChannelIE
from .ustudio import UstudioIE from .ustudio import UstudioIE
from .varzesh3 import Varzesh3IE from .varzesh3 import Varzesh3IE
@ -828,7 +849,10 @@ from .vgtv import (
VGTVIE, VGTVIE,
) )
from .vh1 import VH1IE from .vh1 import VH1IE
from .vice import ViceIE from .vice import (
ViceIE,
ViceShowIE,
)
from .viddler import ViddlerIE from .viddler import ViddlerIE
from .videodetective import VideoDetectiveIE from .videodetective import VideoDetectiveIE
from .videofyme import VideofyMeIE from .videofyme import VideofyMeIE
@ -855,6 +879,7 @@ from .vimeo import (
VimeoChannelIE, VimeoChannelIE,
VimeoGroupsIE, VimeoGroupsIE,
VimeoLikesIE, VimeoLikesIE,
VimeoOndemandIE,
VimeoReviewIE, VimeoReviewIE,
VimeoUserIE, VimeoUserIE,
VimeoWatchLaterIE, VimeoWatchLaterIE,
@ -936,7 +961,9 @@ from .youtube import (
YoutubeChannelIE, YoutubeChannelIE,
YoutubeFavouritesIE, YoutubeFavouritesIE,
YoutubeHistoryIE, YoutubeHistoryIE,
YoutubeLiveIE,
YoutubePlaylistIE, YoutubePlaylistIE,
YoutubePlaylistsIE,
YoutubeRecommendedIE, YoutubeRecommendedIE,
YoutubeSearchDateIE, YoutubeSearchDateIE,
YoutubeSearchIE, YoutubeSearchIE,
@ -946,7 +973,6 @@ from .youtube import (
YoutubeTruncatedIDIE, YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE, YoutubeTruncatedURLIE,
YoutubeUserIE, YoutubeUserIE,
YoutubePlaylistsIE,
YoutubeWatchLaterIE, YoutubeWatchLaterIE,
) )
from .zapiks import ZapiksIE from .zapiks import ZapiksIE

View File

@ -12,7 +12,7 @@ from ..utils import (
class ABCIE(InfoExtractor): class ABCIE(InfoExtractor):
IE_NAME = 'abc.net.au' IE_NAME = 'abc.net.au'
_VALID_URL = r'http://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)' _VALID_URL = r'https?://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',

View File

@ -16,7 +16,7 @@ from ..utils import (
class AddAnimeIE(InfoExtractor): class AddAnimeIE(InfoExtractor):
_VALID_URL = r'http://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)' _VALID_URL = r'https?://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', 'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
'md5': '72954ea10bc979ab5e2eb288b21425a0', 'md5': '72954ea10bc979ab5e2eb288b21425a0',

View File

@ -6,7 +6,7 @@ from ..utils import int_or_none
class AftonbladetIE(InfoExtractor): class AftonbladetIE(InfoExtractor):
_VALID_URL = r'http://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)' _VALID_URL = r'https?://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
_TEST = { _TEST = {
'url': 'http://tv.aftonbladet.se/abtv/articles/36015', 'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
'info_dict': { 'info_dict': {

View File

@ -4,7 +4,7 @@ from .common import InfoExtractor
class AlJazeeraIE(InfoExtractor): class AlJazeeraIE(InfoExtractor):
_VALID_URL = r'http://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html' _VALID_URL = r'https?://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
_TEST = { _TEST = {
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html', 'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
@ -13,24 +13,18 @@ class AlJazeeraIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Slum - Episode 1: Deliverance', 'title': 'The Slum - Episode 1: Deliverance',
'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.', 'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',
'uploader': 'Al Jazeera English', 'uploader_id': '665003303001',
'timestamp': 1411116829,
'upload_date': '20140919',
}, },
'add_ie': ['BrightcoveLegacy'], 'add_ie': ['BrightcoveNew'],
'skip': 'Not accessible from Travis CI server', 'skip': 'Not accessible from Travis CI server',
} }
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
def _real_extract(self, url): def _real_extract(self, url):
program_name = self._match_id(url) program_name = self._match_id(url)
webpage = self._download_webpage(url, program_name) webpage = self._download_webpage(url, program_name)
brightcove_id = self._search_regex( brightcove_id = self._search_regex(
r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id') r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
return {
'_type': 'url',
'url': (
'brightcove:'
'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc'
'&%40videoPlayer={0}'.format(brightcove_id)
),
'ie_key': 'BrightcoveLegacy',
}

View File

@ -3,10 +3,14 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse from ..compat import (
compat_urlparse,
compat_str,
)
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
encode_dict, encode_dict,
extract_attributes,
ExtractorError, ExtractorError,
sanitized_Request, sanitized_Request,
urlencode_postdata, urlencode_postdata,
@ -18,7 +22,7 @@ class AnimeOnDemandIE(InfoExtractor):
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in' _LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply' _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
_NETRC_MACHINE = 'animeondemand' _NETRC_MACHINE = 'animeondemand'
_TEST = { _TESTS = [{
'url': 'https://www.anime-on-demand.de/anime/161', 'url': 'https://www.anime-on-demand.de/anime/161',
'info_dict': { 'info_dict': {
'id': '161', 'id': '161',
@ -26,7 +30,19 @@ class AnimeOnDemandIE(InfoExtractor):
'description': 'md5:6681ce3c07c7189d255ac6ab23812d31', 'description': 'md5:6681ce3c07c7189d255ac6ab23812d31',
}, },
'playlist_mincount': 4, 'playlist_mincount': 4,
} }, {
# Film wording is used instead of Episode
'url': 'https://www.anime-on-demand.de/anime/39',
'only_matching': True,
}, {
# Episodes without titles
'url': 'https://www.anime-on-demand.de/anime/162',
'only_matching': True,
}, {
# ger/jap, Dub/OmU, account required
'url': 'https://www.anime-on-demand.de/anime/169',
'only_matching': True,
}]
def _login(self): def _login(self):
(username, password) = self._get_login_info() (username, password) = self._get_login_info()
@ -36,6 +52,10 @@ class AnimeOnDemandIE(InfoExtractor):
login_page = self._download_webpage( login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page') self._LOGIN_URL, None, 'Downloading login page')
if '>Our licensing terms allow the distribution of animes only to German-speaking countries of Europe' in login_page:
self.raise_geo_restricted(
'%s is only available in German-speaking countries of Europe' % self.IE_NAME)
login_form = self._form_hidden_inputs('new_user', login_page) login_form = self._form_hidden_inputs('new_user', login_page)
login_form.update({ login_form.update({
@ -91,14 +111,22 @@ class AnimeOnDemandIE(InfoExtractor):
entries = [] entries = []
for episode_html in re.findall(r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage): for num, episode_html in enumerate(re.findall(
m = re.search( r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage), 1):
r'class="episodebox-title"[^>]+title="Episode (?P<number>\d+) - (?P<title>.+?)"', episode_html) episodebox_title = self._search_regex(
if not m: (r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
episode_html, 'episodebox title', default=None, group='title')
if not episodebox_title:
continue continue
episode_number = int(m.group('number')) episode_number = int(self._search_regex(
episode_title = m.group('title') r'(?:Episode|Film)\s*(\d+)',
episodebox_title, 'episode number', default=num))
episode_title = self._search_regex(
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
episodebox_title, 'episode title', default=None)
video_id = 'episode-%d' % episode_number video_id = 'episode-%d' % episode_number
common_info = { common_info = {
@ -110,33 +138,86 @@ class AnimeOnDemandIE(InfoExtractor):
formats = [] formats = []
playlist_url = self._search_regex( for input_ in re.findall(
r'data-playlist=(["\'])(?P<url>.+?)\1', r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', episode_html):
episode_html, 'data playlist', default=None, group='url') attributes = extract_attributes(input_)
if playlist_url: playlist_urls = []
request = sanitized_Request( for playlist_key in ('data-playlist', 'data-otherplaylist'):
compat_urlparse.urljoin(url, playlist_url), playlist_url = attributes.get(playlist_key)
headers={ if isinstance(playlist_url, compat_str) and re.match(
'X-Requested-With': 'XMLHttpRequest', r'/?[\da-zA-Z]+', playlist_url):
'X-CSRF-Token': csrf_token, playlist_urls.append(attributes[playlist_key])
'Referer': url, if not playlist_urls:
'Accept': 'application/json, text/javascript, */*; q=0.01', continue
})
playlist = self._download_json( lang = attributes.get('data-lang')
request, video_id, 'Downloading playlist JSON', fatal=False) lang_note = attributes.get('value')
if playlist:
playlist = playlist['playlist'][0] for playlist_url in playlist_urls:
title = playlist['title'] kind = self._search_regex(
r'videomaterialurl/\d+/([^/]+)/',
playlist_url, 'media kind', default=None)
format_id_list = []
if lang:
format_id_list.append(lang)
if kind:
format_id_list.append(kind)
if not format_id_list:
format_id_list.append(compat_str(num))
format_id = '-'.join(format_id_list)
format_note = ', '.join(filter(None, (kind, lang_note)))
request = sanitized_Request(
compat_urlparse.urljoin(url, playlist_url),
headers={
'X-Requested-With': 'XMLHttpRequest',
'X-CSRF-Token': csrf_token,
'Referer': url,
'Accept': 'application/json, text/javascript, */*; q=0.01',
})
playlist = self._download_json(
request, video_id, 'Downloading %s playlist JSON' % format_id,
fatal=False)
if not playlist:
continue
start_video = playlist.get('startvideo', 0)
playlist = playlist.get('playlist')
if not playlist or not isinstance(playlist, list):
continue
playlist = playlist[start_video]
title = playlist.get('title')
if not title:
continue
description = playlist.get('description') description = playlist.get('description')
for source in playlist.get('sources', []): for source in playlist.get('sources', []):
file_ = source.get('file') file_ = source.get('file')
if file_ and determine_ext(file_) == 'm3u8': if not file_:
formats = self._extract_m3u8_formats( continue
ext = determine_ext(file_)
format_id_list = [lang, kind]
if ext == 'm3u8':
format_id_list.append('hls')
elif source.get('type') == 'video/dash' or ext == 'mpd':
format_id_list.append('dash')
format_id = '-'.join(filter(None, format_id_list))
if ext == 'm3u8':
file_formats = self._extract_m3u8_formats(
file_, video_id, 'mp4', file_, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls') entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False)
elif source.get('type') == 'video/dash' or ext == 'mpd':
continue
file_formats = self._extract_mpd_formats(
file_, video_id, mpd_id=format_id, fatal=False)
else:
continue
for f in file_formats:
f.update({
'language': lang,
'format_note': format_note,
})
formats.extend(file_formats)
if formats: if formats:
self._sort_formats(formats)
f = common_info.copy() f = common_info.copy()
f.update({ f.update({
'title': title, 'title': title,
@ -145,16 +226,18 @@ class AnimeOnDemandIE(InfoExtractor):
}) })
entries.append(f) entries.append(f)
m = re.search( # Extract teaser only when full episode is not available
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<', if not formats:
episode_html) m = re.search(
if m: r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<',
f = common_info.copy() episode_html)
f.update({ if m:
'id': '%s-teaser' % f['id'], f = common_info.copy()
'title': m.group('title'), f.update({
'url': compat_urlparse.urljoin(url, m.group('href')), 'id': '%s-teaser' % f['id'],
}) 'title': m.group('title'),
entries.append(f) 'url': compat_urlparse.urljoin(url, m.group('href')),
})
entries.append(f)
return self.playlist_result(entries, anime_id, anime_title, anime_description) return self.playlist_result(entries, anime_id, anime_title, anime_description)

View File

@ -1,24 +1,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
class AolIE(InfoExtractor): class AolIE(InfoExtractor):
IE_NAME = 'on.aol.com' IE_NAME = 'on.aol.com'
_VALID_URL = r'''(?x) _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/video/.*-)(?P<id>[0-9]+)(?:$|\?)'
(?:
aol-video:|
http://on\.aol\.com/
(?:
video/.*-|
playlist/(?P<playlist_display_id>[^/?#]+?)-(?P<playlist_id>[0-9]+)[?#].*_videoid=
)
)
(?P<id>[0-9]+)
(?:$|\?)
'''
_TESTS = [{ _TESTS = [{
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
@ -29,42 +16,31 @@ class AolIE(InfoExtractor):
'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam', 'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam',
}, },
'add_ie': ['FiveMin'], 'add_ie': ['FiveMin'],
}, {
'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316',
'info_dict': {
'id': '152147',
'title': 'Brace Yourself - Today\'s Weirdest News',
},
'playlist_mincount': 10,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id') return self.url_result('5min:%s' % video_id)
playlist_id = mobj.group('playlist_id')
if not playlist_id or self._downloader.params.get('noplaylist'):
return self.url_result('5min:%s' % video_id)
self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
webpage = self._download_webpage(url, playlist_id) class AolFeaturesIE(InfoExtractor):
title = self._html_search_regex( IE_NAME = 'features.aol.com'
r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title') _VALID_URL = r'https?://features\.aol\.com/video/(?P<id>[^/?#]+)'
playlist_html = self._search_regex(
r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage,
'playlist HTML')
entries = [{
'_type': 'url',
'url': 'aol-video:%s' % m.group('id'),
'ie_key': 'Aol',
} for m in re.finditer(
r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>",
playlist_html)]
return { _TESTS = [{
'_type': 'playlist', 'url': 'http://features.aol.com/video/behind-secret-second-careers-late-night-talk-show-hosts',
'id': playlist_id, 'md5': '7db483bb0c09c85e241f84a34238cc75',
'display_id': mobj.group('playlist_display_id'), 'info_dict': {
'title': title, 'id': '519507715',
'entries': entries, 'ext': 'mp4',
} 'title': 'What To Watch - February 17, 2016',
},
'add_ie': ['FiveMin'],
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
return self.url_result(self._search_regex(
r'<script type="text/javascript" src="(https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js[^"]+)"',
webpage, '5min embed url'), 'FiveMin')

View File

@ -23,7 +23,7 @@ from ..utils import (
class ArteTvIE(InfoExtractor): class ArteTvIE(InfoExtractor):
_VALID_URL = r'http://videos\.arte\.tv/(?P<lang>fr|de|en|es)/.*-(?P<id>.*?)\.html' _VALID_URL = r'https?://videos\.arte\.tv/(?P<lang>fr|de|en|es)/.*-(?P<id>.*?)\.html'
IE_NAME = 'arte.tv' IE_NAME = 'arte.tv'
def _real_extract(self, url): def _real_extract(self, url):
@ -121,15 +121,18 @@ class ArteTVPlus7IE(InfoExtractor):
json_url = compat_parse_qs( json_url = compat_parse_qs(
compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0] compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0]
if json_url: if json_url:
return self._extract_from_json_url(json_url, video_id, lang) title = self._search_regex(
# Differend kind of embed URL (e.g. r'<h3[^>]+title=(["\'])(?P<title>.+?)\1',
webpage, 'title', default=None, group='title')
return self._extract_from_json_url(json_url, video_id, lang, title=title)
# Different kind of embed URL (e.g.
# http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium) # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium)
embed_url = self._search_regex( embed_url = self._search_regex(
r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1',
webpage, 'embed url', group='url') webpage, 'embed url', group='url')
return self.url_result(embed_url) return self.url_result(embed_url)
def _extract_from_json_url(self, json_url, video_id, lang): def _extract_from_json_url(self, json_url, video_id, lang, title=None):
info = self._download_json(json_url, video_id) info = self._download_json(json_url, video_id)
player_info = info['videoJsonPlayer'] player_info = info['videoJsonPlayer']
@ -137,7 +140,7 @@ class ArteTVPlus7IE(InfoExtractor):
if not upload_date_str: if not upload_date_str:
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0] upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
title = player_info['VTI'].strip() title = (player_info.get('VTI') or title or player_info['VID']).strip()
subtitle = player_info.get('VSU', '').strip() subtitle = player_info.get('VSU', '').strip()
if subtitle: if subtitle:
title += ' - %s' % subtitle title += ' - %s' % subtitle

View File

@ -10,9 +10,9 @@ from ..utils import (
class AudiMediaIE(InfoExtractor): class AudiMediaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?audimedia\.tv/(?:en|de)/vid/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?P<id>[^/?#]+)'
_TEST = { _TEST = {
'url': 'https://audimedia.tv/en/vid/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test', 'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467',
'md5': '79a8b71c46d49042609795ab59779b66', 'md5': '79a8b71c46d49042609795ab59779b66',
'info_dict': { 'info_dict': {
'id': '1565', 'id': '1565',
@ -32,7 +32,10 @@ class AudiMediaIE(InfoExtractor):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
raw_payload = self._search_regex(r'<script[^>]+class="amtv-embed"[^>]+id="([^"]+)"', webpage, 'raw payload') raw_payload = self._search_regex([
r'class="amtv-embed"[^>]+id="([^"]+)"',
r'class=\\"amtv-embed\\"[^>]+id=\\"([^"]+)\\"',
], webpage, 'raw payload')
_, stage_mode, video_id, lang = raw_payload.split('-') _, stage_mode, video_id, lang = raw_payload.split('-')
# TODO: handle s and e stage_mode (live streams and ended live streams) # TODO: handle s and e stage_mode (live streams and ended live streams)
@ -59,13 +62,19 @@ class AudiMediaIE(InfoExtractor):
video_version_url = video_version.get('download_url') or video_version.get('stream_url') video_version_url = video_version.get('download_url') or video_version.get('stream_url')
if not video_version_url: if not video_version_url:
continue continue
formats.append({ f = {
'url': video_version_url, 'url': video_version_url,
'width': int_or_none(video_version.get('width')), 'width': int_or_none(video_version.get('width')),
'height': int_or_none(video_version.get('height')), 'height': int_or_none(video_version.get('height')),
'abr': int_or_none(video_version.get('audio_bitrate')), 'abr': int_or_none(video_version.get('audio_bitrate')),
'vbr': int_or_none(video_version.get('video_bitrate')), 'vbr': int_or_none(video_version.get('video_bitrate')),
}) }
bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None)
if bitrate:
f.update({
'format_id': 'http-%s' % bitrate,
})
formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)
return { return {

View File

@ -0,0 +1,66 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import float_or_none
class AudioBoomIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?audioboom\.com/boos/(?P<id>[0-9]+)'
_TEST = {
'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0',
'md5': '63a8d73a055c6ed0f1e51921a10a5a76',
'info_dict': {
'id': '4279833',
'ext': 'mp3',
'title': '3/09/2016 Czaban Hour 3',
'description': 'Guest: Nate Davis - NFL free agency, Guest: Stan Gans',
'duration': 2245.72,
'uploader': 'Steve Czaban',
'uploader_url': 're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
clip = None
clip_store = self._parse_json(
self._search_regex(
r'data-new-clip-store=(["\'])(?P<json>{.*?"clipId"\s*:\s*%s.*?})\1' % video_id,
webpage, 'clip store', default='{}', group='json'),
video_id, fatal=False)
if clip_store:
clips = clip_store.get('clips')
if clips and isinstance(clips, list) and isinstance(clips[0], dict):
clip = clips[0]
def from_clip(field):
if clip:
clip.get(field)
audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
'audio', webpage, 'audio url')
title = from_clip('title') or self._og_search_title(webpage)
description = from_clip('description') or self._og_search_description(webpage)
duration = float_or_none(from_clip('duration') or self._html_search_meta(
'weibo:audio:duration', webpage))
uploader = from_clip('author') or self._og_search_property(
'audio:artist', webpage, 'uploader', fatal=False)
uploader_url = from_clip('author_url') or self._html_search_meta(
'audioboo:channel', webpage, 'uploader url')
return {
'id': video_id,
'url': audio_url,
'title': title,
'description': description,
'duration': duration,
'uploader': uploader,
'uploader_url': uploader_url,
}

View File

@ -98,7 +98,7 @@ class AzubuIE(InfoExtractor):
class AzubuLiveIE(InfoExtractor): class AzubuLiveIE(InfoExtractor):
_VALID_URL = r'http://www.azubu.tv/(?P<id>[^/]+)$' _VALID_URL = r'https?://www.azubu.tv/(?P<id>[^/]+)$'
_TEST = { _TEST = {
'url': 'http://www.azubu.tv/MarsTVMDLen', 'url': 'http://www.azubu.tv/MarsTVMDLen',

View File

@ -9,7 +9,7 @@ from ..utils import unescapeHTML
class BaiduVideoIE(InfoExtractor): class BaiduVideoIE(InfoExtractor):
IE_DESC = '百度视频' IE_DESC = '百度视频'
_VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm' _VALID_URL = r'https?://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
_TESTS = [{ _TESTS = [{
'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6', 'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
'info_dict': { 'info_dict': {

View File

@ -10,7 +10,6 @@ from ..utils import (
int_or_none, int_or_none,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
remove_end,
unescapeHTML, unescapeHTML,
) )
from ..compat import ( from ..compat import (
@ -561,7 +560,7 @@ class BBCIE(BBCCoUkIE):
'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460', 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
'info_dict': { 'info_dict': {
'id': '3662a707-0af9-3149-963f-47bea720b460', 'id': '3662a707-0af9-3149-963f-47bea720b460',
'title': 'BBC Blogs - Adam Curtis - BUGGER', 'title': 'BUGGER',
}, },
'playlist_count': 18, 'playlist_count': 18,
}, { }, {
@ -670,9 +669,17 @@ class BBCIE(BBCCoUkIE):
'url': 'http://www.bbc.com/sport/0/football/34475836', 'url': 'http://www.bbc.com/sport/0/football/34475836',
'info_dict': { 'info_dict': {
'id': '34475836', 'id': '34475836',
'title': 'What Liverpool can expect from Klopp', 'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
}, },
'playlist_count': 3, 'playlist_count': 3,
}, {
# school report article with single video
'url': 'http://www.bbc.co.uk/schoolreport/35744779',
'info_dict': {
'id': '35744779',
'title': 'School which breaks down barriers in Jerusalem',
},
'playlist_count': 1,
}, { }, {
# single video with playlist URL from weather section # single video with playlist URL from weather section
'url': 'http://www.bbc.com/weather/features/33601775', 'url': 'http://www.bbc.com/weather/features/33601775',
@ -735,8 +742,17 @@ class BBCIE(BBCCoUkIE):
json_ld_info = self._search_json_ld(webpage, playlist_id, default=None) json_ld_info = self._search_json_ld(webpage, playlist_id, default=None)
timestamp = json_ld_info.get('timestamp') timestamp = json_ld_info.get('timestamp')
playlist_title = json_ld_info.get('title') playlist_title = json_ld_info.get('title')
playlist_description = json_ld_info.get('description') if not playlist_title:
playlist_title = self._og_search_title(
webpage, default=None) or self._html_search_regex(
r'<title>(.+?)</title>', webpage, 'playlist title', default=None)
if playlist_title:
playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
playlist_description = json_ld_info.get(
'description') or self._og_search_description(webpage, default=None)
if not timestamp: if not timestamp:
timestamp = parse_iso8601(self._search_regex( timestamp = parse_iso8601(self._search_regex(
@ -797,8 +813,6 @@ class BBCIE(BBCCoUkIE):
playlist.get('progressiveDownloadUrl'), playlist_id, timestamp)) playlist.get('progressiveDownloadUrl'), playlist_id, timestamp))
if entries: if entries:
playlist_title = playlist_title or remove_end(self._og_search_title(webpage), ' - BBC News')
playlist_description = playlist_description or self._og_search_description(webpage, default=None)
return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
# single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret) # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
@ -829,10 +843,6 @@ class BBCIE(BBCCoUkIE):
'subtitles': subtitles, 'subtitles': subtitles,
} }
playlist_title = self._html_search_regex(
r'<title>(.*?)(?:\s*-\s*BBC [^ ]+)?</title>', webpage, 'playlist title')
playlist_description = self._og_search_description(webpage, default=None)
def extract_all(pattern): def extract_all(pattern):
return list(filter(None, map( return list(filter(None, map(
lambda s: self._parse_json(s, playlist_id, fatal=False), lambda s: self._parse_json(s, playlist_id, fatal=False),
@ -932,7 +942,7 @@ class BBCIE(BBCCoUkIE):
class BBCCoUkArticleIE(InfoExtractor): class BBCCoUkArticleIE(InfoExtractor):
_VALID_URL = 'http://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
IE_NAME = 'bbc.co.uk:article' IE_NAME = 'bbc.co.uk:article'
IE_DESC = 'BBC articles' IE_DESC = 'BBC articles'

View File

@ -8,7 +8,7 @@ from ..utils import url_basename
class BehindKinkIE(InfoExtractor): class BehindKinkIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)' _VALID_URL = r'https?://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
_TEST = { _TEST = {
'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/', 'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
'md5': '507b57d8fdcd75a41a9a7bdb7989c762', 'md5': '507b57d8fdcd75a41a9a7bdb7989c762',

View File

@ -14,7 +14,7 @@ from ..utils import (
class BiliBiliIE(InfoExtractor): class BiliBiliIE(InfoExtractor):
_VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?' _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?'
_TESTS = [{ _TESTS = [{
'url': 'http://www.bilibili.tv/video/av1074402/', 'url': 'http://www.bilibili.tv/video/av1074402/',

View File

@ -0,0 +1,86 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import remove_end
class BioBioChileTVIE(InfoExtractor):
_VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
_TESTS = [{
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml',
'md5': '26f51f03cf580265defefb4518faec09',
'info_dict': {
'id': 'sobre-camaras-y-camarillas-parlamentarias',
'ext': 'mp4',
'title': 'Sobre Cámaras y camarillas parlamentarias',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'Fernando Atria',
},
}, {
# different uploader layout
'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml',
'md5': 'edc2e6b58974c46d5b047dea3c539ff3',
'info_dict': {
'id': 'natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades',
'ext': 'mp4',
'title': 'Natalia Valdebenito repasa a diputado Hasbún: Pasó a la categoría de hablar brutalidades',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'Piangella Obrador',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
'only_matching': True,
}, {
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo.shtml',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV')
file_url = self._search_regex(
r'loadFWPlayerVideo\([^,]+,\s*(["\'])(?P<url>.+?)\1',
webpage, 'file url', group='url')
base_url = self._search_regex(
r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*fileURL', webpage,
'base url', default='http://unlimited2-cl.digitalproserver.com/bbtv/',
group='url')
formats = self._extract_m3u8_formats(
'%s%s/playlist.m3u8' % (base_url, file_url), video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
f = {
'url': '%s%s' % (base_url, file_url),
'format_id': 'http',
'protocol': 'http',
'preference': 1,
}
if formats:
f_copy = formats[-1].copy()
f_copy.update(f)
f = f_copy
formats.append(f)
self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage)
uploader = self._html_search_regex(
r'<a[^>]+href=["\']https?://busca\.biobiochile\.cl/author[^>]+>(.+?)</a>',
webpage, 'uploader', fatal=False)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'uploader': uploader,
'formats': formats,
}

View File

@ -28,10 +28,10 @@ class BleacherReportIE(InfoExtractor):
'add_ie': ['Ooyala'], 'add_ie': ['Ooyala'],
}, { }, {
'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo', 'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo',
'md5': 'af5f90dc9c7ba1c19d0a3eac806bbf50', 'md5': '6a5cd403418c7b01719248ca97fb0692',
'info_dict': { 'info_dict': {
'id': '2586817', 'id': '2586817',
'ext': 'mp4', 'ext': 'webm',
'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo', 'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo',
'timestamp': 1446839961, 'timestamp': 1446839961,
'uploader': 'Sean Fay', 'uploader': 'Sean Fay',
@ -93,10 +93,14 @@ class BleacherReportCMSIE(AMPIE):
'md5': '8c2c12e3af7805152675446c905d159b', 'md5': '8c2c12e3af7805152675446c905d159b',
'info_dict': { 'info_dict': {
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1', 'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
'ext': 'flv', 'ext': 'mp4',
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division', 'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e', 'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
}, },
'params': {
# m3u8 download
'skip_download': True,
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -33,7 +33,7 @@ class BokeCCBaseIE(InfoExtractor):
class BokeCCIE(BokeCCBaseIE): class BokeCCIE(BokeCCBaseIE):
_IE_DESC = 'CC视频' _IE_DESC = 'CC视频'
_VALID_URL = r'http://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
_TESTS = [{ _TESTS = [{
'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B', 'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B',

View File

@ -12,7 +12,7 @@ from ..utils import (
class BpbIE(InfoExtractor): class BpbIE(InfoExtractor):
IE_DESC = 'Bundeszentrale für politische Bildung' IE_DESC = 'Bundeszentrale für politische Bildung'
_VALID_URL = r'http://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/' _VALID_URL = r'https?://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/'
_TEST = { _TEST = {
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr', 'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',

View File

@ -0,0 +1,28 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import smuggle_url
class BravoTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+videos/(?P<id>[^/?]+)'
_TEST = {
'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale',
'md5': 'd60cdf68904e854fac669bd26cccf801',
'info_dict': {
'id': 'LitrBdX64qLn',
'ext': 'mp4',
'title': 'Last Chance Kitchen Returns',
'description': 'S13: Last Chance Kitchen Returns for Top Chef Season 13',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
account_pid = self._search_regex(r'"account_pid"\s*:\s*"([^"]+)"', webpage, 'account pid')
release_pid = self._search_regex(r'"release_pid"\s*:\s*"([^"]+)"', webpage, 'release pid')
return self.url_result(smuggle_url(
'http://link.theplatform.com/s/%s/%s?mbr=true&switch=progressive' % (account_pid, release_pid),
{'force_smil_url': True}), 'ThePlatform', release_pid)

View File

@ -11,7 +11,7 @@ from ..utils import (
class BreakIE(InfoExtractor): class BreakIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056', 'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
'info_dict': { 'info_dict': {

View File

@ -9,10 +9,10 @@ from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_parse_qs, compat_parse_qs,
compat_str, compat_str,
compat_urllib_parse,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urlparse, compat_urlparse,
compat_xml_parse_error, compat_xml_parse_error,
compat_HTTPError,
) )
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
@ -23,16 +23,16 @@ from ..utils import (
js_to_json, js_to_json,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request,
unescapeHTML, unescapeHTML,
unsmuggle_url, unsmuggle_url,
update_url_query,
) )
class BrightcoveLegacyIE(InfoExtractor): class BrightcoveLegacyIE(InfoExtractor):
IE_NAME = 'brightcove:legacy' IE_NAME = 'brightcove:legacy'
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)' _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' _FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated'
_TESTS = [ _TESTS = [
{ {
@ -155,8 +155,8 @@ class BrightcoveLegacyIE(InfoExtractor):
# Not all pages define this value # Not all pages define this value
if playerKey is not None: if playerKey is not None:
params['playerKey'] = playerKey params['playerKey'] = playerKey
# The three fields hold the id of the video # These fields hold the id of the video
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
if videoPlayer is not None: if videoPlayer is not None:
params['@videoPlayer'] = videoPlayer params['@videoPlayer'] = videoPlayer
linkBase = find_param('linkBaseURL') linkBase = find_param('linkBaseURL')
@ -184,8 +184,7 @@ class BrightcoveLegacyIE(InfoExtractor):
@classmethod @classmethod
def _make_brightcove_url(cls, params): def _make_brightcove_url(cls, params):
data = compat_urllib_parse.urlencode(params) return update_url_query(cls._FEDERATED_URL, params)
return cls._FEDERATED_URL_TEMPLATE % data
@classmethod @classmethod
def _extract_brightcove_url(cls, webpage): def _extract_brightcove_url(cls, webpage):
@ -239,7 +238,7 @@ class BrightcoveLegacyIE(InfoExtractor):
# We set the original url as the default 'Referer' header # We set the original url as the default 'Referer' header
referer = smuggled_data.get('Referer', url) referer = smuggled_data.get('Referer', url)
return self._get_video_info( return self._get_video_info(
videoPlayer[0], query_str, query, referer=referer) videoPlayer[0], query, referer=referer)
elif 'playerKey' in query: elif 'playerKey' in query:
player_key = query['playerKey'] player_key = query['playerKey']
return self._get_playlist_info(player_key[0]) return self._get_playlist_info(player_key[0])
@ -248,15 +247,14 @@ class BrightcoveLegacyIE(InfoExtractor):
'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?', 'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
expected=True) expected=True)
def _get_video_info(self, video_id, query_str, query, referer=None): def _get_video_info(self, video_id, query, referer=None):
request_url = self._FEDERATED_URL_TEMPLATE % query_str headers = {}
req = sanitized_Request(request_url)
linkBase = query.get('linkBaseURL') linkBase = query.get('linkBaseURL')
if linkBase is not None: if linkBase is not None:
referer = linkBase[0] referer = linkBase[0]
if referer is not None: if referer is not None:
req.add_header('Referer', referer) headers['Referer'] = referer
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query)
error_msg = self._html_search_regex( error_msg = self._html_search_regex(
r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage, r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
@ -355,7 +353,7 @@ class BrightcoveLegacyIE(InfoExtractor):
class BrightcoveNewIE(InfoExtractor): class BrightcoveNewIE(InfoExtractor):
IE_NAME = 'brightcove:new' IE_NAME = 'brightcove:new'
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>(?:ref:)?\d+)' _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+|ref:[^&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001', 'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
'md5': 'c8100925723840d4b0d243f7025703be', 'md5': 'c8100925723840d4b0d243f7025703be',
@ -391,6 +389,10 @@ class BrightcoveNewIE(InfoExtractor):
# ref: prefixed video id # ref: prefixed video id
'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442', 'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442',
'only_matching': True, 'only_matching': True,
}, {
# non numeric ref: prefixed video id
'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356',
'only_matching': True,
}] }]
@staticmethod @staticmethod
@ -410,8 +412,8 @@ class BrightcoveNewIE(InfoExtractor):
# Look for iframe embeds [1] # Look for iframe embeds [1]
for _, url in re.findall( for _, url in re.findall(
r'<iframe[^>]+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage): r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
entries.append(url) entries.append(url if url.startswith('http') else 'http:' + url)
# Look for embed_in_page embeds [2] # Look for embed_in_page embeds [2]
for video_id, account_id, player_id, embed in re.findall( for video_id, account_id, player_id, embed in re.findall(
@ -420,11 +422,11 @@ class BrightcoveNewIE(InfoExtractor):
# According to [4] data-video-id may be prefixed with ref: # According to [4] data-video-id may be prefixed with ref:
r'''(?sx) r'''(?sx)
<video[^>]+ <video[^>]+
data-video-id=["\']((?:ref:)?\d+)["\'][^>]*>.*? data-video-id=["\'](\d+|ref:[^"\']+)["\'][^>]*>.*?
</video>.*? </video>.*?
<script[^>]+ <script[^>]+
src=["\'](?:https?:)?//players\.brightcove\.net/ src=["\'](?:https?:)?//players\.brightcove\.net/
(\d+)/([\da-f-]+)_([^/]+)/index\.min\.js (\d+)/([\da-f-]+)_([^/]+)/index(?:\.min)?\.js
''', webpage): ''', webpage):
entries.append( entries.append(
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
@ -454,24 +456,33 @@ class BrightcoveNewIE(InfoExtractor):
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1', r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk') webpage, 'policy key', group='pk')
req = sanitized_Request( api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' try:
% (account_id, video_id), json_data = self._download_json(api_url, video_id, headers={
headers={'Accept': 'application/json;pk=%s' % policy_key}) 'Accept': 'application/json;pk=%s' % policy_key
json_data = self._download_json(req, video_id) })
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
json_data = self._parse_json(e.cause.read().decode(), video_id)
raise ExtractorError(json_data[0]['message'], expected=True)
raise
title = json_data['name'] title = json_data['name']
formats = [] formats = []
for source in json_data.get('sources', []): for source in json_data.get('sources', []):
container = source.get('container')
source_type = source.get('type') source_type = source.get('type')
src = source.get('src') src = source.get('src')
if source_type == 'application/x-mpegURL': if source_type == 'application/x-mpegURL' or container == 'M2TS':
if not src: if not src:
continue continue
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', entry_protocol='m3u8_native', src, video_id, 'mp4', m3u8_id='hls', fatal=False))
m3u8_id='hls', fatal=False)) elif source_type == 'application/dash+xml':
if not src:
continue
formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False))
else: else:
streaming_src = source.get('streaming_src') streaming_src = source.get('streaming_src')
stream_name, app_name = source.get('stream_name'), source.get('app_name') stream_name, app_name = source.get('stream_name'), source.get('app_name')
@ -479,15 +490,23 @@ class BrightcoveNewIE(InfoExtractor):
continue continue
tbr = float_or_none(source.get('avg_bitrate'), 1000) tbr = float_or_none(source.get('avg_bitrate'), 1000)
height = int_or_none(source.get('height')) height = int_or_none(source.get('height'))
width = int_or_none(source.get('width'))
f = { f = {
'tbr': tbr, 'tbr': tbr,
'width': int_or_none(source.get('width')),
'height': height,
'filesize': int_or_none(source.get('size')), 'filesize': int_or_none(source.get('size')),
'container': source.get('container'), 'container': container,
'vcodec': source.get('codec'), 'ext': container.lower(),
'ext': source.get('container').lower(),
} }
if width == 0 and height == 0:
f.update({
'vcodec': 'none',
})
else:
f.update({
'width': width,
'height': height,
'vcodec': source.get('codec'),
})
def build_format_id(kind): def build_format_id(kind):
format_id = kind format_id = kind

View File

@ -16,7 +16,7 @@ from ..utils import (
class CamdemyIE(InfoExtractor): class CamdemyIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# single file # single file
'url': 'http://www.camdemy.com/media/5181/', 'url': 'http://www.camdemy.com/media/5181/',
@ -104,7 +104,7 @@ class CamdemyIE(InfoExtractor):
class CamdemyFolderIE(InfoExtractor): class CamdemyFolderIE(InfoExtractor):
_VALID_URL = r'http://www.camdemy.com/folder/(?P<id>\d+)' _VALID_URL = r'https?://www.camdemy.com/folder/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# links with trailing slash # links with trailing slash
'url': 'http://www.camdemy.com/folder/450', 'url': 'http://www.camdemy.com/folder/450',

View File

@ -11,7 +11,7 @@ from ..utils import (
class CBSNewsIE(ThePlatformIE): class CBSNewsIE(ThePlatformIE):
IE_DESC = 'CBS News' IE_DESC = 'CBS News'
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
_TESTS = [ _TESTS = [
{ {
@ -78,7 +78,7 @@ class CBSNewsIE(ThePlatformIE):
pid = item.get('media' + format_id) pid = item.get('media' + format_id)
if not pid: if not pid:
continue continue
release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?format=SMIL&mbr=true' % pid release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' % pid
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid) tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid)
formats.extend(tp_formats) formats.extend(tp_formats)
subtitles = self._merge_subtitles(subtitles, tp_subtitles) subtitles = self._merge_subtitles(subtitles, tp_subtitles)
@ -96,7 +96,7 @@ class CBSNewsIE(ThePlatformIE):
class CBSNewsLiveVideoIE(InfoExtractor): class CBSNewsLiveVideoIE(InfoExtractor):
IE_DESC = 'CBS News Live Videos' IE_DESC = 'CBS News Live Videos'
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
_TEST = { _TEST = {
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/', 'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',

View File

@ -6,7 +6,7 @@ from .common import InfoExtractor
class CBSSportsIE(InfoExtractor): class CBSSportsIE(InfoExtractor):
_VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)' _VALID_URL = r'https?://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
_TEST = { _TEST = {
'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s', 'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',

96
youtube_dl/extractor/cda.py Executable file
View File

@ -0,0 +1,96 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
decode_packed_codes,
ExtractorError,
parse_duration
)
class CDAIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
_TESTS = [{
'url': 'http://www.cda.pl/video/5749950c',
'md5': '6f844bf51b15f31fae165365707ae970',
'info_dict': {
'id': '5749950c',
'ext': 'mp4',
'height': 720,
'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
'duration': 39
}
}, {
'url': 'http://www.cda.pl/video/57413289',
'md5': 'a88828770a8310fc00be6c95faf7f4d5',
'info_dict': {
'id': '57413289',
'ext': 'mp4',
'title': 'Lądowanie na lotnisku na Maderze',
'duration': 137
}
}, {
'url': 'http://ebd.cda.pl/0x0/5749950c',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id)
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
raise ExtractorError('This video is only available for premium users.', expected=True)
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
formats = []
info_dict = {
'id': video_id,
'title': title,
'formats': formats,
'duration': None,
}
def extract_format(page, version):
unpacked = decode_packed_codes(page)
format_url = self._search_regex(
r"url:\\'(.+?)\\'", unpacked, '%s url' % version, fatal=False)
if not format_url:
return
f = {
'url': format_url,
}
m = re.search(
r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p',
page)
if m:
f.update({
'format_id': m.group('format_id'),
'height': int(m.group('height')),
})
info_dict['formats'].append(f)
if not info_dict['duration']:
info_dict['duration'] = parse_duration(self._search_regex(
r"duration:\\'(.+?)\\'", unpacked, 'duration', fatal=False))
extract_format(webpage, 'default')
for href, resolution in re.findall(
r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
webpage):
webpage = self._download_webpage(
href, video_id, 'Downloading %s version information' % resolution, fatal=False)
if not webpage:
# Manually report warning because empty page is returned when
# invalid version is requested.
self.report_warning('Unable to download %s version information' % resolution)
continue
extract_format(webpage, resolution)
self._sort_formats(formats)
return info_dict

View File

@ -129,7 +129,8 @@ class CeskaTelevizeIE(InfoExtractor):
formats = [] formats = []
for format_id, stream_url in item['streamUrls'].items(): for format_id, stream_url in item['streamUrls'].items():
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
stream_url, playlist_id, 'mp4', entry_protocol='m3u8_native')) stream_url, playlist_id, 'mp4',
entry_protocol='m3u8_native', fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
item_id = item.get('id') or item['assetId'] item_id = item.get('id') or item['assetId']

View File

@ -21,6 +21,10 @@ class CinemassacreIE(InfoExtractor):
'title': '“Angry Video Game Nerd: The Movie” Trailer', 'title': '“Angry Video Game Nerd: The Movie” Trailer',
'description': 'md5:fb87405fcb42a331742a0dce2708560b', 'description': 'md5:fb87405fcb42a331742a0dce2708560b',
}, },
'params': {
# m3u8 download
'skip_download': True,
},
}, },
{ {
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', 'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
@ -31,14 +35,18 @@ class CinemassacreIE(InfoExtractor):
'upload_date': '20131002', 'upload_date': '20131002',
'title': 'The Mummys Hand (1940)', 'title': 'The Mummys Hand (1940)',
}, },
'params': {
# m3u8 download
'skip_download': True,
},
}, },
{ {
# Youtube embedded video # Youtube embedded video
'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/', 'url': 'http://cinemassacre.com/2006/12/07/chronologically-confused-about-bad-movie-and-video-game-sequel-titles/',
'md5': 'df4cf8a1dcedaec79a73d96d83b99023', 'md5': 'ec9838a5520ef5409b3e4e42fcb0a3b9',
'info_dict': { 'info_dict': {
'id': 'OEVzPCY2T-g', 'id': 'OEVzPCY2T-g',
'ext': 'mp4', 'ext': 'webm',
'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles', 'title': 'AVGN: Chronologically Confused about Bad Movie and Video Game Sequel Titles',
'upload_date': '20061207', 'upload_date': '20061207',
'uploader': 'Cinemassacre', 'uploader': 'Cinemassacre',
@ -49,12 +57,12 @@ class CinemassacreIE(InfoExtractor):
{ {
# Youtube embedded video # Youtube embedded video
'url': 'http://cinemassacre.com/2006/09/01/mckids/', 'url': 'http://cinemassacre.com/2006/09/01/mckids/',
'md5': '6eb30961fa795fedc750eac4881ad2e1', 'md5': '7393c4e0f54602ad110c793eb7a6513a',
'info_dict': { 'info_dict': {
'id': 'FnxsNhuikpo', 'id': 'FnxsNhuikpo',
'ext': 'mp4', 'ext': 'webm',
'upload_date': '20060901', 'upload_date': '20060901',
'uploader': 'Cinemassacre Extras', 'uploader': 'Cinemassacre Extra',
'description': 'md5:de9b751efa9e45fbaafd9c8a1123ed53', 'description': 'md5:de9b751efa9e45fbaafd9c8a1123ed53',
'uploader_id': 'Cinemassacre', 'uploader_id': 'Cinemassacre',
'title': 'AVGN: McKids', 'title': 'AVGN: McKids',
@ -69,7 +77,11 @@ class CinemassacreIE(InfoExtractor):
'description': 'Lets Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!', 'description': 'Lets Play Mario Kart 64 !! Mario Kart 64 is a classic go-kart racing game released for the Nintendo 64 (N64). Today James & Mike do 4 player Battle Mode with Kyle and Bootsy!',
'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays', 'title': 'Mario Kart 64 (Nintendo 64) James & Mike Mondays',
'upload_date': '20150525', 'upload_date': '20150525',
} },
'params': {
# m3u8 download
'skip_download': True,
},
} }
] ]

View File

@ -19,7 +19,7 @@ def _decode(s):
class CliphunterIE(InfoExtractor): class CliphunterIE(InfoExtractor):
IE_NAME = 'cliphunter' IE_NAME = 'cliphunter'
_VALID_URL = r'''(?x)http://(?:www\.)?cliphunter\.com/w/ _VALID_URL = r'''(?x)https?://(?:www\.)?cliphunter\.com/w/
(?P<id>[0-9]+)/ (?P<id>[0-9]+)/
(?P<seo>.+?)(?:$|[#\?]) (?P<seo>.+?)(?:$|[#\?])
''' '''

View File

@ -8,7 +8,7 @@ from ..utils import (
class ClipsyndicateIE(InfoExtractor): class ClipsyndicateIE(InfoExtractor):
_VALID_URL = r'http://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)' _VALID_URL = r'https?://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe', 'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',

View File

@ -12,7 +12,7 @@ from ..utils import (
class ClubicIE(InfoExtractor): class ClubicIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html' _VALID_URL = r'https?://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html', 'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html',

View File

@ -51,9 +51,7 @@ class CNETIE(ThePlatformIE):
uploader = None uploader = None
uploader_id = None uploader_id = None
mpx_account = data['config']['uvpConfig']['default']['mpx_account'] metadata = self.get_metadata('kYEXFC/%s' % list(vdata['files'].values())[0], video_id)
metadata = self.get_metadata('%s/%s' % (mpx_account, list(vdata['files'].values())[0]), video_id)
description = vdata.get('description') or metadata.get('description') description = vdata.get('description') or metadata.get('description')
duration = int_or_none(vdata.get('duration')) or metadata.get('duration') duration = int_or_none(vdata.get('duration')) or metadata.get('duration')
@ -62,7 +60,7 @@ class CNETIE(ThePlatformIE):
for (fkey, vid) in vdata['files'].items(): for (fkey, vid) in vdata['files'].items():
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']: if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
continue continue
release_url = 'http://link.theplatform.com/s/%s/%s?format=SMIL&mbr=true' % (mpx_account, vid) release_url = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true' % vid
if fkey == 'hds': if fkey == 'hds':
release_url += '&manifest=f4m' release_url += '&manifest=f4m'
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey) tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)

View File

@ -11,7 +11,7 @@ from ..utils import (
class ComCarCoffIE(InfoExtractor): class ComCarCoffIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)' _VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'
_TESTS = [{ _TESTS = [{
'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/', 'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
'info_dict': { 'info_dict': {

View File

@ -15,13 +15,14 @@ import math
from ..compat import ( from ..compat import (
compat_cookiejar, compat_cookiejar,
compat_cookies, compat_cookies,
compat_etree_fromstring,
compat_getpass, compat_getpass,
compat_http_client, compat_http_client,
compat_os_name,
compat_str,
compat_urllib_error, compat_urllib_error,
compat_urllib_parse, compat_urllib_parse,
compat_urlparse, compat_urlparse,
compat_str,
compat_etree_fromstring,
) )
from ..utils import ( from ..utils import (
NO_DEFAULT, NO_DEFAULT,
@ -47,6 +48,7 @@ from ..utils import (
determine_protocol, determine_protocol,
parse_duration, parse_duration,
mimetype2ext, mimetype2ext,
update_url_query,
) )
@ -104,7 +106,7 @@ class InfoExtractor(object):
* protocol The protocol that will be used for the actual * protocol The protocol that will be used for the actual
download, lower-case. download, lower-case.
"http", "https", "rtsp", "rtmp", "rtmpe", "http", "https", "rtsp", "rtmp", "rtmpe",
"m3u8", or "m3u8_native". "m3u8", "m3u8_native" or "http_dash_segments".
* preference Order number of this format. If this field is * preference Order number of this format. If this field is
present and not None, the formats get sorted present and not None, the formats get sorted
by this field, regardless of all other values. by this field, regardless of all other values.
@ -344,7 +346,7 @@ class InfoExtractor(object):
def IE_NAME(self): def IE_NAME(self):
return compat_str(type(self).__name__[:-2]) return compat_str(type(self).__name__[:-2])
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True): def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None):
""" Returns the response handle """ """ Returns the response handle """
if note is None: if note is None:
self.report_download_webpage(video_id) self.report_download_webpage(video_id)
@ -353,6 +355,12 @@ class InfoExtractor(object):
self.to_screen('%s' % (note,)) self.to_screen('%s' % (note,))
else: else:
self.to_screen('%s: %s' % (video_id, note)) self.to_screen('%s: %s' % (video_id, note))
# data, headers and query params will be ignored for `Request` objects
if isinstance(url_or_request, compat_str):
if query:
url_or_request = update_url_query(url_or_request, query)
if data or headers:
url_or_request = sanitized_Request(url_or_request, data, headers or {})
try: try:
return self._downloader.urlopen(url_or_request) return self._downloader.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@ -368,13 +376,13 @@ class InfoExtractor(object):
self._downloader.report_warning(errmsg) self._downloader.report_warning(errmsg)
return False return False
def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None): def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers=None, query=None):
""" Returns a tuple (page content as string, URL handle) """ """ Returns a tuple (page content as string, URL handle) """
# Strip hashes from the URL (#1038) # Strip hashes from the URL (#1038)
if isinstance(url_or_request, (compat_str, str)): if isinstance(url_or_request, (compat_str, str)):
url_or_request = url_or_request.partition('#')[0] url_or_request = url_or_request.partition('#')[0]
urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal) urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
if urlh is False: if urlh is False:
assert not fatal assert not fatal
return False return False
@ -427,7 +435,7 @@ class InfoExtractor(object):
self.to_screen('Saving request to ' + filename) self.to_screen('Saving request to ' + filename)
# Working around MAX_PATH limitation on Windows (see # Working around MAX_PATH limitation on Windows (see
# http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx) # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
if os.name == 'nt': if compat_os_name == 'nt':
absfilepath = os.path.abspath(filename) absfilepath = os.path.abspath(filename)
if len(absfilepath) > 259: if len(absfilepath) > 259:
filename = '\\\\?\\' + absfilepath filename = '\\\\?\\' + absfilepath
@ -461,13 +469,13 @@ class InfoExtractor(object):
return content return content
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None): def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers=None, query=None):
""" Returns the data of the page as a string """ """ Returns the data of the page as a string """
success = False success = False
try_count = 0 try_count = 0
while success is False: while success is False:
try: try:
res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding) res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding, data=data, headers=headers, query=query)
success = True success = True
except compat_http_client.IncompleteRead as e: except compat_http_client.IncompleteRead as e:
try_count += 1 try_count += 1
@ -482,10 +490,10 @@ class InfoExtractor(object):
def _download_xml(self, url_or_request, video_id, def _download_xml(self, url_or_request, video_id,
note='Downloading XML', errnote='Unable to download XML', note='Downloading XML', errnote='Unable to download XML',
transform_source=None, fatal=True, encoding=None): transform_source=None, fatal=True, encoding=None, data=None, headers=None, query=None):
"""Return the xml as an xml.etree.ElementTree.Element""" """Return the xml as an xml.etree.ElementTree.Element"""
xml_string = self._download_webpage( xml_string = self._download_webpage(
url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding) url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query)
if xml_string is False: if xml_string is False:
return xml_string return xml_string
if transform_source: if transform_source:
@ -496,10 +504,10 @@ class InfoExtractor(object):
note='Downloading JSON metadata', note='Downloading JSON metadata',
errnote='Unable to download JSON metadata', errnote='Unable to download JSON metadata',
transform_source=None, transform_source=None,
fatal=True, encoding=None): fatal=True, encoding=None, data=None, headers=None, query=None):
json_string = self._download_webpage( json_string = self._download_webpage(
url_or_request, video_id, note, errnote, fatal=fatal, url_or_request, video_id, note, errnote, fatal=fatal,
encoding=encoding) encoding=encoding, data=data, headers=headers, query=query)
if (not fatal) and json_string is False: if (not fatal) and json_string is False:
return None return None
return self._parse_json( return self._parse_json(
@ -596,7 +604,7 @@ class InfoExtractor(object):
if mobj: if mobj:
break break
if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty(): if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
_name = '\033[0;34m%s\033[0m' % name _name = '\033[0;34m%s\033[0m' % name
else: else:
_name = name _name = name
@ -854,6 +862,7 @@ class InfoExtractor(object):
proto_preference = 0 if determine_protocol(f) in ['http', 'https'] else -0.1 proto_preference = 0 if determine_protocol(f) in ['http', 'https'] else -0.1
if f.get('vcodec') == 'none': # audio only if f.get('vcodec') == 'none': # audio only
preference -= 50
if self._downloader.params.get('prefer_free_formats'): if self._downloader.params.get('prefer_free_formats'):
ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus'] ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
else: else:
@ -864,6 +873,8 @@ class InfoExtractor(object):
except ValueError: except ValueError:
audio_ext_preference = -1 audio_ext_preference = -1
else: else:
if f.get('acodec') == 'none': # video only
preference -= 40
if self._downloader.params.get('prefer_free_formats'): if self._downloader.params.get('prefer_free_formats'):
ORDER = ['flv', 'mp4', 'webm'] ORDER = ['flv', 'mp4', 'webm']
else: else:
@ -965,6 +976,13 @@ class InfoExtractor(object):
if manifest is False: if manifest is False:
return [] return []
return self._parse_f4m_formats(
manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id,
transform_source=transform_source, fatal=fatal)
def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
transform_source=lambda s: fix_xml_ampersands(s).strip(),
fatal=True):
formats = [] formats = []
manifest_version = '1.0' manifest_version = '1.0'
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media') media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
@ -990,7 +1008,8 @@ class InfoExtractor(object):
# bitrate in f4m downloader # bitrate in f4m downloader
if determine_ext(manifest_url) == 'f4m': if determine_ext(manifest_url) == 'f4m':
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
manifest_url, video_id, preference, f4m_id, fatal=fatal)) manifest_url, video_id, preference=preference, f4m_id=f4m_id,
transform_source=transform_source, fatal=fatal))
continue continue
tbr = int_or_none(media_el.attrib.get('bitrate')) tbr = int_or_none(media_el.attrib.get('bitrate'))
formats.append({ formats.append({
@ -1139,8 +1158,8 @@ class InfoExtractor(object):
out.append('{%s}%s' % (namespace, c)) out.append('{%s}%s' % (namespace, c))
return '/'.join(out) return '/'.join(out)
def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None): def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
smil = self._download_smil(smil_url, video_id, fatal=fatal) smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
if smil is False: if smil is False:
assert not fatal assert not fatal
@ -1157,10 +1176,10 @@ class InfoExtractor(object):
return {} return {}
return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params) return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
def _download_smil(self, smil_url, video_id, fatal=True): def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None):
return self._download_xml( return self._download_xml(
smil_url, video_id, 'Downloading SMIL file', smil_url, video_id, 'Downloading SMIL file',
'Unable to download SMIL file', fatal=fatal) 'Unable to download SMIL file', fatal=fatal, transform_source=transform_source)
def _parse_smil(self, smil, smil_url, video_id, f4m_params=None): def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
namespace = self._parse_smil_namespace(smil) namespace = self._parse_smil_namespace(smil)
@ -1446,8 +1465,9 @@ class InfoExtractor(object):
continue continue
representation_attrib = adaptation_set.attrib.copy() representation_attrib = adaptation_set.attrib.copy()
representation_attrib.update(representation.attrib) representation_attrib.update(representation.attrib)
mime_type = representation_attrib.get('mimeType') # According to page 41 of ISO/IEC 29001-1:2014, @mimeType is mandatory
content_type = mime_type.split('/')[0] if mime_type else representation_attrib.get('contentType') mime_type = representation_attrib['mimeType']
content_type = mime_type.split('/')[0]
if content_type == 'text': if content_type == 'text':
# TODO implement WebVTT downloading # TODO implement WebVTT downloading
pass pass
@ -1470,6 +1490,7 @@ class InfoExtractor(object):
f = { f = {
'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
'url': base_url, 'url': base_url,
'ext': mimetype2ext(mime_type),
'width': int_or_none(representation_attrib.get('width')), 'width': int_or_none(representation_attrib.get('width')),
'height': int_or_none(representation_attrib.get('height')), 'height': int_or_none(representation_attrib.get('height')),
'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000), 'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),

View File

@ -0,0 +1,36 @@
from __future__ import unicode_literals
import os
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_unquote,
compat_urlparse,
)
from ..utils import url_basename
class RtmpIE(InfoExtractor):
IE_DESC = False # Do not list
_VALID_URL = r'(?i)rtmp[est]?://.+'
_TESTS = [{
'url': 'rtmp://cp44293.edgefcs.net/ondemand?auth=daEcTdydfdqcsb8cZcDbAaCbhamacbbawaS-bw7dBb-bWG-GqpGFqCpNCnGoyL&aifp=v001&slist=public/unsecure/audio/2c97899446428e4301471a8cb72b4b97--audio--pmg-20110908-0900a_flv_aac_med_int.mp4',
'only_matching': True,
}, {
'url': 'rtmp://edge.live.hitbox.tv/live/dimak',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
title = compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
return {
'id': video_id,
'title': title,
'formats': [{
'url': url,
'ext': 'flv',
'format_id': compat_urlparse.urlparse(url).scheme,
}],
}

View File

@ -45,7 +45,7 @@ class CondeNastIE(InfoExtractor):
'wmagazine': 'W Magazine', 'wmagazine': 'W Magazine',
} }
_VALID_URL = r'http://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys()) _VALID_URL = r'https?://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed(?:js)?)/.+?' % '|'.join(_SITES.keys()) EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed(?:js)?)/.+?' % '|'.join(_SITES.keys())

View File

@ -54,7 +54,7 @@ class CrunchyrollBaseIE(InfoExtractor):
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None): def _download_webpage(self, url_or_request, *args, **kwargs):
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request) request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
else sanitized_Request(url_or_request)) else sanitized_Request(url_or_request))
# Accept-Language must be set explicitly to accept any language to avoid issues # Accept-Language must be set explicitly to accept any language to avoid issues
@ -65,8 +65,7 @@ class CrunchyrollBaseIE(InfoExtractor):
# Crunchyroll to not work in georestriction cases in some browsers that don't place # Crunchyroll to not work in georestriction cases in some browsers that don't place
# the locale lang first in header. However allowing any language seems to workaround the issue. # the locale lang first in header. However allowing any language seems to workaround the issue.
request.add_header('Accept-Language', '*') request.add_header('Accept-Language', '*')
return super(CrunchyrollBaseIE, self)._download_webpage( return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
request, video_id, note, errnote, fatal, tries, timeout, encoding)
@staticmethod @staticmethod
def _add_skip_wall(url): def _add_skip_wall(url):

View File

@ -15,7 +15,7 @@ from .senateisvp import SenateISVPIE
class CSpanIE(InfoExtractor): class CSpanIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)' _VALID_URL = r'https?://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
IE_DESC = 'C-SPAN' IE_DESC = 'C-SPAN'
_TESTS = [{ _TESTS = [{
'url': 'http://www.c-span.org/video/?313572-1/HolderonV', 'url': 'http://www.c-span.org/video/?313572-1/HolderonV',

View File

@ -8,7 +8,7 @@ from ..utils import parse_iso8601, ExtractorError
class CtsNewsIE(InfoExtractor): class CtsNewsIE(InfoExtractor):
IE_DESC = '華視新聞' IE_DESC = '華視新聞'
# https connection failed (Connection reset) # https connection failed (Connection reset)
_VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html' _VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html', 'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',
'md5': 'a9875cb790252b08431186d741beaabe', 'md5': 'a9875cb790252b08431186d741beaabe',

View File

@ -6,7 +6,7 @@ from ..compat import compat_str
class DctpTvIE(InfoExtractor): class DctpTvIE(InfoExtractor):
_VALID_URL = r'http://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$' _VALID_URL = r'https?://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$'
_TEST = { _TEST = {
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/', 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
'info_dict': { 'info_dict': {

View File

@ -5,7 +5,7 @@ from .common import InfoExtractor
class DefenseGouvFrIE(InfoExtractor): class DefenseGouvFrIE(InfoExtractor):
IE_NAME = 'defense.gouv.fr' IE_NAME = 'defense.gouv.fr'
_VALID_URL = r'http://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)' _VALID_URL = r'https?://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'
_TEST = { _TEST = {
'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1', 'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',

View File

@ -9,7 +9,7 @@ from ..compat import compat_str
class DiscoveryIE(InfoExtractor): class DiscoveryIE(InfoExtractor):
_VALID_URL = r'''(?x)http://(?:www\.)?(?: _VALID_URL = r'''(?x)https?://(?:www\.)?(?:
discovery| discovery|
investigationdiscovery| investigationdiscovery|
discoverylife| discoverylife|

View File

@ -10,7 +10,7 @@ from ..compat import (compat_str, compat_basestring)
class DouyuTVIE(InfoExtractor): class DouyuTVIE(InfoExtractor):
IE_DESC = '斗鱼' IE_DESC = '斗鱼'
_VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)' _VALID_URL = r'https?://(?:www\.)?douyu(?:tv)?\.com/(?P<id>[A-Za-z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.douyutv.com/iseven', 'url': 'http://www.douyutv.com/iseven',
'info_dict': { 'info_dict': {
@ -18,7 +18,7 @@ class DouyuTVIE(InfoExtractor):
'display_id': 'iseven', 'display_id': 'iseven',
'ext': 'flv', 'ext': 'flv',
'title': 're:^清晨醒脑T-ara根本停不下来 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'title': 're:^清晨醒脑T-ara根本停不下来 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:c93d6692dde6fe33809a46edcbecca44', 'description': 'md5:f34981259a03e980a3c6404190a3ed61',
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
'uploader': '7师傅', 'uploader': '7师傅',
'uploader_id': '431925', 'uploader_id': '431925',
@ -26,7 +26,7 @@ class DouyuTVIE(InfoExtractor):
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
} },
}, { }, {
'url': 'http://www.douyutv.com/85982', 'url': 'http://www.douyutv.com/85982',
'info_dict': { 'info_dict': {
@ -42,7 +42,27 @@ class DouyuTVIE(InfoExtractor):
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
} },
'skip': 'Romm not found',
}, {
'url': 'http://www.douyutv.com/17732',
'info_dict': {
'id': '17732',
'display_id': '17732',
'ext': 'flv',
'title': 're:^清晨醒脑T-ara根本停不下来 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
'description': 'md5:f34981259a03e980a3c6404190a3ed61',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': '7师傅',
'uploader_id': '431925',
'is_live': True,
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.douyu.com/xiaocang',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -10,7 +10,7 @@ from ..utils import int_or_none
class DPlayIE(InfoExtractor): class DPlayIE(InfoExtractor):
_VALID_URL = r'http://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/', 'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/',

View File

@ -7,7 +7,7 @@ from .zdf import ZDFIE
class DreiSatIE(ZDFIE): class DreiSatIE(ZDFIE):
IE_NAME = '3sat' IE_NAME = '3sat'
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' _VALID_URL = r'(?:https?://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',

View File

@ -15,7 +15,7 @@ class DVTVIE(InfoExtractor):
IE_NAME = 'dvtv' IE_NAME = 'dvtv'
IE_DESC = 'http://video.aktualne.cz/' IE_DESC = 'http://video.aktualne.cz/'
_VALID_URL = r'http://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})' _VALID_URL = r'https?://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'
_TESTS = [{ _TESTS = [{
'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/', 'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/',

View File

@ -0,0 +1,85 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import int_or_none
from ..compat import compat_urlparse
class DWIE(InfoExtractor):
IE_NAME = 'dw'
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+av-(?P<id>\d+)'
_TESTS = [{
# video
'url': 'http://www.dw.com/en/intelligent-light/av-19112290',
'md5': '7372046e1815c5a534b43f3c3c36e6e9',
'info_dict': {
'id': '19112290',
'ext': 'mp4',
'title': 'Intelligent light',
'description': 'md5:90e00d5881719f2a6a5827cb74985af1',
'upload_date': '20160311',
}
}, {
# audio
'url': 'http://www.dw.com/en/worldlink-my-business/av-19111941',
'md5': '2814c9a1321c3a51f8a7aeb067a360dd',
'info_dict': {
'id': '19111941',
'ext': 'mp3',
'title': 'WorldLink: My business',
'description': 'md5:bc9ca6e4e063361e21c920c53af12405',
'upload_date': '20160311',
}
}]
def _real_extract(self, url):
media_id = self._match_id(url)
webpage = self._download_webpage(url, media_id)
hidden_inputs = self._hidden_inputs(webpage)
title = hidden_inputs['media_title']
formats = []
if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
formats = self._extract_smil_formats(
'http://www.dw.com/smil/v-%s' % media_id, media_id,
transform_source=lambda s: s.replace(
'rtmp://tv-od.dw.de/flash/',
'http://tv-download.dw.de/dwtv_video/flv/'))
else:
formats = [{'url': hidden_inputs['file_name']}]
return {
'id': media_id,
'title': title,
'description': self._og_search_description(webpage),
'thumbnail': hidden_inputs.get('preview_image'),
'duration': int_or_none(hidden_inputs.get('file_duration')),
'upload_date': hidden_inputs.get('display_date'),
'formats': formats,
}
class DWArticleIE(InfoExtractor):
IE_NAME = 'dw:article'
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+a-(?P<id>\d+)'
_TEST = {
'url': 'http://www.dw.com/en/no-hope-limited-options-for-refugees-in-idomeni/a-19111009',
'md5': '8ca657f9d068bbef74d6fc38b97fc869',
'info_dict': {
'id': '19105868',
'ext': 'mp4',
'title': 'The harsh life of refugees in Idomeni',
'description': 'md5:196015cc7e48ebf474db9399420043c7',
'upload_date': '20160310',
}
}
def _real_extract(self, url):
article_id = self._match_id(url)
webpage = self._download_webpage(url, article_id)
hidden_inputs = self._hidden_inputs(webpage)
media_id = hidden_inputs['media_id']
media_path = self._search_regex(r'href="([^"]+av-%s)"\s+class="overlayLink"' % media_id, webpage, 'media url')
media_url = compat_urlparse.urljoin(url, media_path)
return self.url_result(media_url, 'DW', media_id)

View File

@ -7,7 +7,7 @@ from .common import InfoExtractor
class EchoMskIE(InfoExtractor): class EchoMskIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'http://www.echo.msk.ru/sounds/1464134.html', 'url': 'http://www.echo.msk.ru/sounds/1464134.html',
'md5': '2e44b3b78daff5b458e4dbc37f191f7c', 'md5': '2e44b3b78daff5b458e4dbc37f191f7c',

View File

@ -9,7 +9,7 @@ class ElPaisIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])' _VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])'
IE_DESC = 'El País' IE_DESC = 'El País'
_TEST = { _TESTS = [{
'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html', 'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html',
'md5': '98406f301f19562170ec071b83433d55', 'md5': '98406f301f19562170ec071b83433d55',
'info_dict': { 'info_dict': {
@ -19,30 +19,41 @@ class ElPaisIE(InfoExtractor):
'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.', 'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.',
'upload_date': '20140206', 'upload_date': '20140206',
} }
} }, {
'url': 'http://elcomidista.elpais.com/elcomidista/2016/02/24/articulo/1456340311_668921.html#?id_externo_nwl=newsletter_diaria20160303t',
'md5': '3bd5b09509f3519d7d9e763179b013de',
'info_dict': {
'id': '1456340311_668921',
'ext': 'mp4',
'title': 'Cómo hacer el mejor café con cafetera italiana',
'description': 'Que sí, que las cápsulas son cómodas. Pero si le pides algo más a la vida, quizá deberías aprender a usar bien la cafetera italiana. No tienes más que ver este vídeo y seguir sus siete normas básicas.',
'upload_date': '20160303',
}
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
prefix = self._html_search_regex( prefix = self._html_search_regex(
r'var url_cache = "([^"]+)";', webpage, 'URL prefix') r'var\s+url_cache\s*=\s*"([^"]+)";', webpage, 'URL prefix')
video_suffix = self._search_regex( video_suffix = self._search_regex(
r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL') r"(?:URLMediaFile|urlVideo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'", webpage, 'video URL')
video_url = prefix + video_suffix video_url = prefix + video_suffix
thumbnail_suffix = self._search_regex( thumbnail_suffix = self._search_regex(
r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL', r"(?:URLMediaStill|urlFotogramaFijo_\d+)\s*=\s*url_cache\s*\+\s*'([^']+)'",
fatal=False) webpage, 'thumbnail URL', fatal=False)
thumbnail = ( thumbnail = (
None if thumbnail_suffix is None None if thumbnail_suffix is None
else prefix + thumbnail_suffix) else prefix + thumbnail_suffix)
title = self._html_search_regex( title = self._html_search_regex(
'<h2 class="entry-header entry-title.*?>(.*?)</h2>', (r"tituloVideo\s*=\s*'([^']+)'", webpage, 'title',
r'<h2 class="entry-header entry-title.*?>(.*?)</h2>'),
webpage, 'title') webpage, 'title')
date_str = self._search_regex( upload_date = unified_strdate(self._search_regex(
r'<p class="date-header date-int updated"\s+title="([^"]+)">', r'<p class="date-header date-int updated"\s+title="([^"]+)">',
webpage, 'upload date', fatal=False) webpage, 'upload date', default=None) or self._html_search_meta(
upload_date = (None if date_str is None else unified_strdate(date_str)) 'datePublished', webpage, 'timestamp'))
return { return {
'id': video_id, 'id': video_id,

View File

@ -1,21 +1,13 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import (
url_basename,
)
class EngadgetIE(InfoExtractor): class EngadgetIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://www.engadget.com/ _VALID_URL = r'https?://www.engadget.com/video/(?P<id>\d+)'
(?:video(?:/5min)?/(?P<id>\d+)|
[\d/]+/.*?)
'''
_TEST = { _TEST = {
'url': 'http://www.engadget.com/video/5min/518153925/', 'url': 'http://www.engadget.com/video/518153925/',
'md5': 'c6820d4828a5064447a4d9fc73f312c9', 'md5': 'c6820d4828a5064447a4d9fc73f312c9',
'info_dict': { 'info_dict': {
'id': '518153925', 'id': '518153925',
@ -27,15 +19,4 @@ class EngadgetIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
return self.url_result('5min:%s' % video_id)
if video_id is not None:
return self.url_result('5min:%s' % video_id)
else:
title = url_basename(url)
webpage = self._download_webpage(url, title)
ids = re.findall(r'<iframe[^>]+?playList=(\d+)', webpage)
return {
'_type': 'playlist',
'title': title,
'entries': [self.url_result('5min:%s' % vid) for vid in ids]
}

View File

@ -8,7 +8,7 @@ from .common import InfoExtractor
class ExfmIE(InfoExtractor): class ExfmIE(InfoExtractor):
IE_NAME = 'exfm' IE_NAME = 'exfm'
IE_DESC = 'ex.fm' IE_DESC = 'ex.fm'
_VALID_URL = r'http://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)' _VALID_URL = r'https?://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)'
_SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream' _SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
_TESTS = [ _TESTS = [
{ {

View File

@ -37,7 +37,9 @@ class FacebookIE(InfoExtractor):
video/embed| video/embed|
story\.php story\.php
)\?(?:.*?)(?:v|video_id|story_fbid)=| )\?(?:.*?)(?:v|video_id|story_fbid)=|
[^/]+/videos/(?:[^/]+/)? [^/]+/videos/(?:[^/]+/)?|
[^/]+/posts/|
groups/[^/]+/permalink/
)| )|
facebook: facebook:
) )
@ -50,6 +52,8 @@ class FacebookIE(InfoExtractor):
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36' _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
_TESTS = [{ _TESTS = [{
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf', 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
'md5': '6a40d33c0eccbb1af76cf0485a052659', 'md5': '6a40d33c0eccbb1af76cf0485a052659',
@ -81,6 +85,33 @@ class FacebookIE(InfoExtractor):
'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...', 'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
'uploader': 'Demy de Zeeuw', 'uploader': 'Demy de Zeeuw',
}, },
}, {
'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
'info_dict': {
'id': '544765982287235',
'ext': 'mp4',
'title': '"What are you doing running in the snow?"',
'uploader': 'FailArmy',
}
}, {
'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
'info_dict': {
'id': '1035862816472149',
'ext': 'mp4',
'title': 'What the Flock Is Going On In New Zealand Credit: ViralHog',
'uploader': 'S. Saint',
},
}, {
'note': 'swf params escaped',
'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
'md5': '97ba073838964d12c70566e0085c2b91',
'info_dict': {
'id': '10153664894881749',
'ext': 'mp4',
'title': 'Facebook video #10153664894881749',
},
}, { }, {
'url': 'https://www.facebook.com/video.php?v=10204634152394104', 'url': 'https://www.facebook.com/video.php?v=10204634152394104',
'only_matching': True, 'only_matching': True,
@ -94,7 +125,7 @@ class FacebookIE(InfoExtractor):
'url': 'facebook:544765982287235', 'url': 'facebook:544765982287235',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903', 'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
'only_matching': True, 'only_matching': True,
}] }]
@ -164,19 +195,19 @@ class FacebookIE(InfoExtractor):
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
def _real_extract(self, url): def _extract_from_url(self, url, video_id, fatal_if_no_video=True):
video_id = self._match_id(url) req = sanitized_Request(url)
req = sanitized_Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
req.add_header('User-Agent', self._CHROME_USER_AGENT) req.add_header('User-Agent', self._CHROME_USER_AGENT)
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(req, video_id)
video_data = None video_data = None
BEFORE = '{swf.addParam(param[0], param[1]);});\n' BEFORE = '{swf.addParam(param[0], param[1]);});'
AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});' AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage) m = re.search(re.escape(BEFORE) + '(?:\n|\\\\n)(.*?)' + re.escape(AFTER), webpage)
if m: if m:
data = dict(json.loads(m.group(1))) swf_params = m.group(1).replace('\\\\', '\\').replace('\\"', '"')
data = dict(json.loads(swf_params))
params_raw = compat_urllib_parse_unquote(data['params']) params_raw = compat_urllib_parse_unquote(data['params'])
video_data = json.loads(params_raw)['video_data'] video_data = json.loads(params_raw)['video_data']
@ -189,13 +220,15 @@ class FacebookIE(InfoExtractor):
if not video_data: if not video_data:
server_js_data = self._parse_json(self._search_regex( server_js_data = self._parse_json(self._search_regex(
r'handleServerJS\(({.+})\);', webpage, 'server js data'), video_id) r'handleServerJS\(({.+})\);', webpage, 'server js data', default='{}'), video_id)
for item in server_js_data.get('instances', []): for item in server_js_data.get('instances', []):
if item[1][0] == 'VideoConfig': if item[1][0] == 'VideoConfig':
video_data = video_data_list2dict(item[2][0]['videoData']) video_data = video_data_list2dict(item[2][0]['videoData'])
break break
if not video_data: if not video_data:
if not fatal_if_no_video:
return webpage, False
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage) m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
if m_msg is not None: if m_msg is not None:
raise ExtractorError( raise ExtractorError(
@ -241,39 +274,36 @@ class FacebookIE(InfoExtractor):
video_title = 'Facebook video #%s' % video_id video_title = 'Facebook video #%s' % video_id
uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage)) uploader = clean_html(get_element_by_id('fbPhotoPageAuthorName', webpage))
return { info_dict = {
'id': video_id, 'id': video_id,
'title': video_title, 'title': video_title,
'formats': formats, 'formats': formats,
'uploader': uploader, 'uploader': uploader,
} }
return webpage, info_dict
class FacebookPostIE(InfoExtractor):
IE_NAME = 'facebook:post'
_VALID_URL = r'https?://(?:\w+\.)?facebook\.com/[^/]+/posts/(?P<id>\d+)'
_TEST = {
'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
'info_dict': {
'id': '544765982287235',
'ext': 'mp4',
'title': '"What are you doing running in the snow?"',
'uploader': 'FailArmy',
}
}
def _real_extract(self, url): def _real_extract(self, url):
post_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, post_id) real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url
webpage, info_dict = self._extract_from_url(real_url, video_id, fatal_if_no_video=False)
entries = [ if info_dict:
self.url_result('facebook:%s' % video_id, FacebookIE.ie_key()) return info_dict
for video_id in self._parse_json(
self._search_regex(
r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
webpage, 'video ids', group='ids'),
post_id)]
return self.playlist_result(entries, post_id) if '/posts/' in url:
entries = [
self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
for vid in self._parse_json(
self._search_regex(
r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
webpage, 'video ids', group='ids'),
video_id)]
return self.playlist_result(entries, video_id)
else:
_, info_dict = self._extract_from_url(
self._VIDEO_PAGE_TEMPLATE % video_id,
video_id, fatal_if_no_video=True)
return info_dict

View File

@ -17,7 +17,7 @@ from ..utils import (
class FC2IE(InfoExtractor): class FC2IE(InfoExtractor):
_VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)' _VALID_URL = r'^https?://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)'
IE_NAME = 'fc2' IE_NAME = 'fc2'
_NETRC_MACHINE = 'fc2' _NETRC_MACHINE = 'fc2'
_TESTS = [{ _TESTS = [{

View File

@ -4,7 +4,7 @@ from .common import InfoExtractor
class FirstpostIE(InfoExtractor): class FirstpostIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html' _VALID_URL = r'https?://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
_TEST = { _TEST = {
'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html', 'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html',

View File

@ -8,7 +8,7 @@ from ..utils import int_or_none
class FirstTVIE(InfoExtractor): class FirstTVIE(InfoExtractor):
IE_NAME = '1tv' IE_NAME = '1tv'
IE_DESC = 'Первый канал' IE_DESC = 'Первый канал'
_VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)' _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.1tv.ru/videoarchive/73390', 'url': 'http://www.1tv.ru/videoarchive/73390',

View File

@ -1,5 +1,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
@ -16,12 +18,7 @@ from ..utils import (
class FiveMinIE(InfoExtractor): class FiveMinIE(InfoExtractor):
IE_NAME = '5min' IE_NAME = '5min'
_VALID_URL = r'''(?x) _VALID_URL = r'(?:5min:(?P<id>\d+)(?::(?P<sid>\d+))?|https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?P<query>.*))'
(?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=|
https?://(?:(?:massively|www)\.)?joystiq\.com/video/|
5min:)
(?P<id>\d+)
'''
_TESTS = [ _TESTS = [
{ {
@ -45,6 +42,7 @@ class FiveMinIE(InfoExtractor):
'title': 'How to Make a Next-Level Fruit Salad', 'title': 'How to Make a Next-Level Fruit Salad',
'duration': 184, 'duration': 184,
}, },
'skip': 'no longer available',
}, },
] ]
_ERRORS = { _ERRORS = {
@ -91,20 +89,33 @@ class FiveMinIE(InfoExtractor):
} }
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
sid = mobj.group('sid')
if mobj.group('query'):
qs = compat_parse_qs(mobj.group('query'))
if not qs.get('playList'):
raise ExtractorError('Invalid URL', expected=True)
video_id = qs['playList'][0]
if qs.get('sid'):
sid = qs['sid'][0]
embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id embed_url = 'https://embed.5min.com/playerseed/?playList=%s' % video_id
embed_page = self._download_webpage(embed_url, video_id, if not sid:
'Downloading embed page') embed_page = self._download_webpage(embed_url, video_id,
sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid') 'Downloading embed page')
query = compat_urllib_parse.urlencode({ sid = self._search_regex(r'sid=(\d+)', embed_page, 'sid')
'func': 'GetResults',
'playlist': video_id,
'sid': sid,
'isPlayerSeed': 'true',
'url': embed_url,
})
response = self._download_json( response = self._download_json(
'https://syn.5min.com/handlers/SenseHandler.ashx?' + query, 'https://syn.5min.com/handlers/SenseHandler.ashx?' +
compat_urllib_parse.urlencode({
'func': 'GetResults',
'playlist': video_id,
'sid': sid,
'isPlayerSeed': 'true',
'url': embed_url,
}),
video_id) video_id)
if not response['success']: if not response['success']:
raise ExtractorError( raise ExtractorError(
@ -118,9 +129,7 @@ class FiveMinIE(InfoExtractor):
parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs( parsed_video_url = compat_urllib_parse_urlparse(compat_parse_qs(
compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0]) compat_urllib_parse_urlparse(info['EmbededURL']).query)['videoUrl'][0])
for rendition in info['Renditions']: for rendition in info['Renditions']:
if rendition['RenditionType'] == 'm3u8': if rendition['RenditionType'] == 'aac' or rendition['RenditionType'] == 'm3u8':
formats.extend(self._extract_m3u8_formats(rendition['Url'], video_id, m3u8_id='hls'))
elif rendition['RenditionType'] == 'aac':
continue continue
else: else:
rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType']))) rendition_url = compat_urlparse.urlunparse(parsed_video_url._replace(path=replace_extension(parsed_video_url.path.replace('//', '/%s/' % rendition['ID']), rendition['RenditionType'])))

View File

@ -10,7 +10,7 @@ from ..utils import (
class FKTVIE(InfoExtractor): class FKTVIE(InfoExtractor):
IE_NAME = 'fernsehkritik.tv' IE_NAME = 'fernsehkritik.tv'
_VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?' _VALID_URL = r'https?://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
_TEST = { _TEST = {
'url': 'http://fernsehkritik.tv/folge-1', 'url': 'http://fernsehkritik.tv/folge-1',

View File

@ -5,7 +5,7 @@ from .common import InfoExtractor
class FootyRoomIE(InfoExtractor): class FootyRoomIE(InfoExtractor):
_VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)' _VALID_URL = r'https?://footyroom\.com/(?P<id>[^/]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/', 'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
'info_dict': { 'info_dict': {

View File

@ -4,7 +4,7 @@ from .common import InfoExtractor
class FoxgayIE(InfoExtractor): class FoxgayIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml' _VALID_URL = r'https?://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
_TEST = { _TEST = {
'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml', 'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml',
'md5': '80d72beab5d04e1655a56ad37afe6841', 'md5': '80d72beab5d04e1655a56ad37afe6841',

View File

@ -36,6 +36,10 @@ class FoxNewsIE(AMPIE):
# 'upload_date': '20141204', # 'upload_date': '20141204',
'thumbnail': 're:^https?://.*\.jpg$', 'thumbnail': 're:^https?://.*\.jpg$',
}, },
'params': {
# m3u8 download
'skip_download': True,
},
}, },
{ {
'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com', 'url': 'http://video.foxnews.com/v/video-embed.html?video_id=3937480&d=video.foxnews.com',

View File

@ -6,7 +6,7 @@ from ..utils import int_or_none
class FranceInterIE(InfoExtractor): class FranceInterIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
_TEST = { _TEST = {
'url': 'http://www.franceinter.fr/player/reecouter?play=793962', 'url': 'http://www.franceinter.fr/player/reecouter?play=793962',
'md5': '4764932e466e6f6c79c317d2e74f6884', 'md5': '4764932e466e6f6c79c317d2e74f6884',

View File

@ -60,28 +60,31 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
video_id, 'Downloading f4m manifest token', fatal=False) video_id, 'Downloading f4m manifest token', fatal=False)
if f4m_url: if f4m_url:
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, 1, format_id)) f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
video_id, f4m_id=format_id, fatal=False))
elif ext == 'm3u8': elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id)) formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False))
elif video_url.startswith('rtmp'): elif video_url.startswith('rtmp'):
formats.append({ formats.append({
'url': video_url, 'url': video_url,
'format_id': 'rtmp-%s' % format_id, 'format_id': 'rtmp-%s' % format_id,
'ext': 'flv', 'ext': 'flv',
'preference': 1,
}) })
else: else:
formats.append({ if self._is_valid_url(video_url, video_id, format_id):
'url': video_url, formats.append({
'format_id': format_id, 'url': video_url,
'preference': -1, 'format_id': format_id,
}) })
self._sort_formats(formats) self._sort_formats(formats)
title = info['titre'] title = info['titre']
subtitle = info.get('sous_titre') subtitle = info.get('sous_titre')
if subtitle: if subtitle:
title += ' - %s' % subtitle title += ' - %s' % subtitle
title = title.strip()
subtitles = {} subtitles = {}
subtitles_list = [{ subtitles_list = [{
@ -125,13 +128,13 @@ class PluzzIE(FranceTVBaseInfoExtractor):
class FranceTvInfoIE(FranceTVBaseInfoExtractor): class FranceTvInfoIE(FranceTVBaseInfoExtractor):
IE_NAME = 'francetvinfo.fr' IE_NAME = 'francetvinfo.fr'
_VALID_URL = r'https?://(?:www|mobile)\.francetvinfo\.fr/.*/(?P<title>.+)\.html' _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
'info_dict': { 'info_dict': {
'id': '84981923', 'id': '84981923',
'ext': 'flv', 'ext': 'mp4',
'title': 'Soir 3', 'title': 'Soir 3',
'upload_date': '20130826', 'upload_date': '20130826',
'timestamp': 1377548400, 'timestamp': 1377548400,
@ -139,6 +142,10 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
'fr': 'mincount:2', 'fr': 'mincount:2',
}, },
}, },
'params': {
# m3u8 downloads
'skip_download': True,
},
}, { }, {
'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html', 'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
'info_dict': { 'info_dict': {
@ -155,11 +162,32 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html', 'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
'md5': 'f485bda6e185e7d15dbc69b72bae993e', 'md5': 'f485bda6e185e7d15dbc69b72bae993e',
'info_dict': { 'info_dict': {
'id': '556e03339473995ee145930c', 'id': 'NI_173343',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Les entreprises familiales : le secret de la réussite', 'title': 'Les entreprises familiales : le secret de la réussite',
'thumbnail': 're:^https?://.*\.jpe?g$', 'thumbnail': 're:^https?://.*\.jpe?g$',
} 'timestamp': 1433273139,
'upload_date': '20150602',
},
'params': {
# m3u8 downloads
'skip_download': True,
},
}, {
'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html',
'md5': 'f485bda6e185e7d15dbc69b72bae993e',
'info_dict': {
'id': 'NI_657393',
'ext': 'mp4',
'title': 'Olivier Monthus, réalisateur de "Bretagne, le choix de lArmor"',
'description': 'md5:a3264114c9d29aeca11ced113c37b16c',
'thumbnail': 're:^https?://.*\.jpe?g$',
'timestamp': 1458300695,
'upload_date': '20160318',
},
'params': {
'skip_download': True,
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -172,7 +200,9 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
return self.url_result(dmcloud_url, 'DailymotionCloud') return self.url_result(dmcloud_url, 'DailymotionCloud')
video_id, catalogue = self._search_regex( video_id, catalogue = self._search_regex(
r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@') (r'id-video=([^@]+@[^"]+)',
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
webpage, 'video id').split('@')
return self._extract_video(video_id, catalogue) return self._extract_video(video_id, catalogue)

View File

@ -14,7 +14,7 @@ class FreespeechIE(InfoExtractor):
'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0', 'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0',
'info_dict': { 'info_dict': {
'id': 'poKsVCZ64uU', 'id': 'poKsVCZ64uU',
'ext': 'mp4', 'ext': 'webm',
'title': 'Obama, Romney Campaign in Colorado Ahead of Debate', 'title': 'Obama, Romney Campaign in Colorado Ahead of Debate',
'description': 'Obama, Romney Campaign in Colorado Ahead of Debate', 'description': 'Obama, Romney Campaign in Colorado Ahead of Debate',
'uploader': 'freespeechtv', 'uploader': 'freespeechtv',

View File

@ -5,7 +5,7 @@ from ..utils import ExtractorError
class FreeVideoIE(InfoExtractor): class FreeVideoIE(InfoExtractor):
_VALID_URL = r'^http://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])' _VALID_URL = r'^https?://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])'
_TEST = { _TEST = {
'url': 'http://www.freevideo.cz/vase-videa/vysukany-zadecek-22033.html', 'url': 'http://www.freevideo.cz/vase-videa/vysukany-zadecek-22033.html',

View File

@ -2,42 +2,27 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import int_or_none
class GameInformerIE(InfoExtractor): class GameInformerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>.+)\.aspx' _VALID_URL = r'https?://(?:www\.)?gameinformer\.com/(?:[^/]+/)*(?P<id>.+)\.aspx'
_TEST = { _TEST = {
'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx', 'url': 'http://www.gameinformer.com/b/features/archive/2015/09/26/replay-animal-crossing.aspx',
'md5': '292f26da1ab4beb4c9099f1304d2b071',
'info_dict': { 'info_dict': {
'id': '4515472681001', 'id': '4515472681001',
'ext': 'm3u8', 'ext': 'mp4',
'title': 'Replay - Animal Crossing', 'title': 'Replay - Animal Crossing',
'description': 'md5:2e211891b215c85d061adc7a4dd2d930', 'description': 'md5:2e211891b215c85d061adc7a4dd2d930',
'timestamp': 1443457610706, 'timestamp': 1443457610,
}, 'upload_date': '20150928',
'params': { 'uploader_id': '694940074001',
# m3u8 download
'skip_download': True,
}, },
} }
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/694940074001/default_default/index.html?videoId=%s'
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
brightcove_id = self._search_regex(r"getVideo\('[^']+video_id=(\d+)", webpage, 'brightcove id')
bc_api_url = self._search_regex(r"getVideo\('([^']+)'", webpage, 'brightcove api url') return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
json_data = self._download_json(
bc_api_url + '&video_fields=id,name,shortDescription,publishedDate,videoStillURL,length,IOSRenditions',
display_id)
return {
'id': compat_str(json_data['id']),
'display_id': display_id,
'url': json_data['IOSRenditions'][0]['url'],
'title': json_data['name'],
'description': json_data.get('shortDescription'),
'timestamp': int_or_none(json_data.get('publishedDate')),
'duration': int_or_none(json_data.get('length')),
}

View File

@ -10,7 +10,7 @@ from .youtube import YoutubeIE
class GamekingsIE(InfoExtractor): class GamekingsIE(InfoExtractor):
_VALID_URL = r'http://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)' _VALID_URL = r'https?://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)'
_TESTS = [{ _TESTS = [{
# YouTube embed video # YouTube embed video
'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/', 'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/',

View File

@ -14,7 +14,7 @@ from ..utils import (
class GameSpotIE(InfoExtractor): class GameSpotIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?' _VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'
_TESTS = [{ _TESTS = [{
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', 'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
'md5': 'b2a30deaa8654fcccd43713a6b6a4825', 'md5': 'b2a30deaa8654fcccd43713a6b6a4825',

View File

@ -13,7 +13,7 @@ from ..utils import (
class GameStarIE(InfoExtractor): class GameStarIE(InfoExtractor):
_VALID_URL = r'http://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html' _VALID_URL = r'https?://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
_TEST = { _TEST = {
'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html', 'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
'md5': '96974ecbb7fd8d0d20fca5a00810cea7', 'md5': '96974ecbb7fd8d0d20fca5a00810cea7',

View File

@ -9,7 +9,7 @@ from ..utils import (
class GametrailersIE(InfoExtractor): class GametrailersIE(InfoExtractor):
_VALID_URL = r'http://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)' _VALID_URL = r'https?://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)'
_TEST = { _TEST = {
'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review', 'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review',

View File

@ -59,6 +59,7 @@ from .videomore import VideomoreIE
from .googledrive import GoogleDriveIE from .googledrive import GoogleDriveIE
from .jwplatform import JWPlatformIE from .jwplatform import JWPlatformIE
from .digiteka import DigitekaIE from .digiteka import DigitekaIE
from .instagram import InstagramIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -239,6 +240,35 @@ class GenericIE(InfoExtractor):
'format': 'bestvideo', 'format': 'bestvideo',
}, },
}, },
# m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
{
'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
'info_dict': {
'id': 'content',
'ext': 'mp4',
'title': 'content',
'formats': 'mincount:8',
},
'params': {
# m3u8 downloads
'skip_download': True,
}
},
# m3u8 served with Content-Type: text/plain
{
'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
'info_dict': {
'id': 'index',
'ext': 'mp4',
'title': 'index',
'upload_date': '20140720',
'formats': 'mincount:11',
},
'params': {
# m3u8 downloads
'skip_download': True,
}
},
# google redirect # google redirect
{ {
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@ -1242,28 +1272,30 @@ class GenericIE(InfoExtractor):
full_response = self._request_webpage(request, video_id) full_response = self._request_webpage(request, video_id)
head_response = full_response head_response = full_response
info_dict = {
'id': video_id,
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
}
# Check for direct link to a video # Check for direct link to a video
content_type = head_response.headers.get('Content-Type', '') content_type = head_response.headers.get('Content-Type', '').lower()
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type) m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
if m: if m:
upload_date = unified_strdate( format_id = m.group('format_id')
head_response.headers.get('Last-Modified')) if format_id.endswith('mpegurl'):
formats = []
if m.group('format_id').endswith('mpegurl'):
formats = self._extract_m3u8_formats(url, video_id, 'mp4') formats = self._extract_m3u8_formats(url, video_id, 'mp4')
elif format_id == 'f4m':
formats = self._extract_f4m_formats(url, video_id)
else: else:
formats = [{ formats = [{
'format_id': m.group('format_id'), 'format_id': m.group('format_id'),
'url': url, 'url': url,
'vcodec': 'none' if m.group('type') == 'audio' else None 'vcodec': 'none' if m.group('type') == 'audio' else None
}] }]
return { info_dict['direct'] = True
'id': video_id, info_dict['formats'] = formats
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]), return info_dict
'direct': True,
'formats': formats,
'upload_date': upload_date,
}
if not self._downloader.params.get('test', False) and not is_intentional: if not self._downloader.params.get('test', False) and not is_intentional:
force = self._downloader.params.get('force_generic_extractor', False) force = self._downloader.params.get('force_generic_extractor', False)
@ -1283,21 +1315,23 @@ class GenericIE(InfoExtractor):
request.add_header('Accept-Encoding', '*') request.add_header('Accept-Encoding', '*')
full_response = self._request_webpage(request, video_id) full_response = self._request_webpage(request, video_id)
first_bytes = full_response.read(512)
# Is it an M3U playlist?
if first_bytes.startswith(b'#EXTM3U'):
info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
return info_dict
# Maybe it's a direct link to a video? # Maybe it's a direct link to a video?
# Be careful not to download the whole thing! # Be careful not to download the whole thing!
first_bytes = full_response.read(512)
if not is_html(first_bytes): if not is_html(first_bytes):
self._downloader.report_warning( self._downloader.report_warning(
'URL could be a direct video link, returning it as such.') 'URL could be a direct video link, returning it as such.')
upload_date = unified_strdate( info_dict.update({
head_response.headers.get('Last-Modified'))
return {
'id': video_id,
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
'direct': True, 'direct': True,
'url': url, 'url': url,
'upload_date': upload_date, })
} return info_dict
webpage = self._webpage_read_content( webpage = self._webpage_read_content(
full_response, url, video_id, prefix=first_bytes) full_response, url, video_id, prefix=first_bytes)
@ -1314,12 +1348,12 @@ class GenericIE(InfoExtractor):
elif doc.tag == '{http://xspf.org/ns/0/}playlist': elif doc.tag == '{http://xspf.org/ns/0/}playlist':
return self.playlist_result(self._parse_xspf(doc, video_id), video_id) return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
return { info_dict['formats'] = self._parse_mpd_formats(
'id': video_id, doc, video_id, mpd_base_url=url.rpartition('/')[0])
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]), return info_dict
'formats': self._parse_mpd_formats( elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
doc, video_id, mpd_base_url=url.rpartition('/')[0]), info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
} return info_dict
except compat_xml_parse_error: except compat_xml_parse_error:
pass pass
@ -1876,6 +1910,19 @@ class GenericIE(InfoExtractor):
self._proto_relative_url(unescapeHTML(mobj.group(1))), self._proto_relative_url(unescapeHTML(mobj.group(1))),
'AdobeTVVideo') 'AdobeTVVideo')
# Look for Vine embeds
mobj = re.search(
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
webpage)
if mobj is not None:
return self.url_result(
self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
# Look for Instagram embeds
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
if instagram_embed_url is not None:
return self.url_result(instagram_embed_url, InstagramIE.ie_key())
def check_video(vurl): def check_video(vurl):
if YoutubeIE.suitable(vurl): if YoutubeIE.suitable(vurl):
return True return True
@ -1985,6 +2032,8 @@ class GenericIE(InfoExtractor):
entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4') entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
elif ext == 'mpd': elif ext == 'mpd':
entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id) entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
elif ext == 'f4m':
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
else: else:
entry_info_dict['url'] = video_url entry_info_dict['url'] = video_url

View File

@ -10,8 +10,8 @@ from ..utils import (
class GoogleDriveIE(InfoExtractor): class GoogleDriveIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28})' _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})'
_TEST = { _TESTS = [{
'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1', 'url': 'https://drive.google.com/file/d/0ByeS4oOUV-49Zzh4R1J6R09zazQ/edit?pli=1',
'md5': '881f7700aec4f538571fa1e0eed4a7b6', 'md5': '881f7700aec4f538571fa1e0eed4a7b6',
'info_dict': { 'info_dict': {
@ -20,7 +20,11 @@ class GoogleDriveIE(InfoExtractor):
'title': 'Big Buck Bunny.mp4', 'title': 'Big Buck Bunny.mp4',
'duration': 46, 'duration': 46,
} }
} }, {
# video id is longer than 28 characters
'url': 'https://drive.google.com/file/d/1ENcQ_jeCuj7y19s66_Ou9dRP4GKGsodiDQ/edit',
'only_matching': True,
}]
_FORMATS_EXT = { _FORMATS_EXT = {
'5': 'flv', '5': 'flv',
'6': 'flv', '6': 'flv',
@ -43,7 +47,7 @@ class GoogleDriveIE(InfoExtractor):
@staticmethod @staticmethod
def _extract_url(webpage): def _extract_url(webpage):
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28})', r'<iframe[^>]+src="https?://(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/file/d/)(?P<id>[a-zA-Z0-9_-]{28,})',
webpage) webpage)
if mobj: if mobj:
return 'https://drive.google.com/file/d/%s' % mobj.group('id') return 'https://drive.google.com/file/d/%s' % mobj.group('id')

122
youtube_dl/extractor/hbo.py Normal file
View File

@ -0,0 +1,122 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
xpath_text,
xpath_element,
int_or_none,
parse_duration,
)
class HBOIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
'md5': '1c33253f0c7782142c993c0ba62a8753',
'info_dict': {
'id': '1437839',
'ext': 'mp4',
'title': 'Ep. 64 Clip: Encryption',
}
}
_FORMATS_INFO = {
'1920': {
'width': 1280,
'height': 720,
},
'640': {
'width': 768,
'height': 432,
},
'highwifi': {
'width': 640,
'height': 360,
},
'high3g': {
'width': 640,
'height': 360,
},
'medwifi': {
'width': 400,
'height': 224,
},
'med3g': {
'width': 400,
'height': 224,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_xml(
'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id)
title = xpath_text(video_data, 'title', 'title', True)
formats = []
for source in xpath_element(video_data, 'videos', 'sources', True):
if source.tag == 'size':
path = xpath_text(source, './/path')
if not path:
continue
width = source.attrib.get('width')
format_info = self._FORMATS_INFO.get(width, {})
height = format_info.get('height')
fmt = {
'url': path,
'format_id': 'http%s' % ('-%dp' % height if height else ''),
'width': format_info.get('width'),
'height': height,
}
rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', path)
if rtmp:
fmt.update({
'url': rtmp.group('url'),
'play_path': rtmp.group('playpath'),
'app': rtmp.group('app'),
'ext': 'flv',
'format_id': fmt['format_id'].replace('http', 'rtmp'),
})
formats.append(fmt)
else:
video_url = source.text
if not video_url:
continue
if source.tag == 'tarball':
formats.extend(self._extract_m3u8_formats(
video_url.replace('.tar', '/base_index_w8.m3u8'),
video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
else:
format_info = self._FORMATS_INFO.get(source.tag, {})
formats.append({
'format_id': 'http-%s' % source.tag,
'url': video_url,
'width': format_info.get('width'),
'height': format_info.get('height'),
})
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
thumbnails = []
card_sizes = xpath_element(video_data, 'titleCardSizes')
if card_sizes is not None:
for size in card_sizes:
path = xpath_text(size, 'path')
if not path:
continue
width = int_or_none(size.get('width'))
thumbnails.append({
'id': width,
'url': path,
'width': width,
})
return {
'id': video_id,
'title': title,
'duration': parse_duration(xpath_element(video_data, 'duration/tv14')),
'formats': formats,
'thumbnails': thumbnails,
}

View File

@ -12,7 +12,7 @@ from ..utils import (
class HotNewHipHopIE(InfoExtractor): class HotNewHipHopIE(InfoExtractor):
_VALID_URL = r'http://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html' _VALID_URL = r'https?://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
_TEST = { _TEST = {
'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html', 'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96', 'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',

View File

@ -12,7 +12,7 @@ from ..utils import (
class HypemIE(InfoExtractor): class HypemIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/' _VALID_URL = r'https?://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/'
_TEST = { _TEST = {
'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME', 'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
'md5': 'b9cc91b5af8995e9f0c1cee04c575828', 'md5': 'b9cc91b5af8995e9f0c1cee04c575828',

View File

@ -12,7 +12,7 @@ from ..utils import (
class ImdbIE(InfoExtractor): class ImdbIE(InfoExtractor):
IE_NAME = 'imdb' IE_NAME = 'imdb'
IE_DESC = 'Internet Movie Database trailers' IE_DESC = 'Internet Movie Database trailers'
_VALID_URL = r'http://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)' _VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'http://www.imdb.com/video/imdb/vi2524815897', 'url': 'http://www.imdb.com/video/imdb/vi2524815897',
@ -70,7 +70,7 @@ class ImdbIE(InfoExtractor):
class ImdbListIE(InfoExtractor): class ImdbListIE(InfoExtractor):
IE_NAME = 'imdb:list' IE_NAME = 'imdb:list'
IE_DESC = 'Internet Movie Database lists' IE_DESC = 'Internet Movie Database lists'
_VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})' _VALID_URL = r'https?://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
_TEST = { _TEST = {
'url': 'http://www.imdb.com/list/JFs9NWw6XI0', 'url': 'http://www.imdb.com/list/JFs9NWw6XI0',
'info_dict': { 'info_dict': {

View File

@ -73,7 +73,7 @@ class IndavideoEmbedIE(InfoExtractor):
'url': self._proto_relative_url(thumbnail) 'url': self._proto_relative_url(thumbnail)
} for thumbnail in video.get('thumbnails', [])] } for thumbnail in video.get('thumbnails', [])]
tags = [tag['title'] for tag in video.get('tags', [])] tags = [tag['title'] for tag in video.get('tags') or []]
return { return {
'id': video.get('id') or video_id, 'id': video.get('id') or video_id,

View File

@ -4,8 +4,10 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
get_element_by_attribute,
int_or_none, int_or_none,
limit_length, limit_length,
lowercase_escape,
) )
@ -38,6 +40,18 @@ class InstagramIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
@staticmethod
def _extract_embed_url(webpage):
blockquote_el = get_element_by_attribute(
'class', 'instagram-media', webpage)
if blockquote_el is None:
return
mobj = re.search(
r'<a[^>]+href=([\'"])(?P<link>[^\'"]+)\1', blockquote_el)
if mobj:
return mobj.group('link')
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -46,6 +60,8 @@ class InstagramIE(InfoExtractor):
webpage, 'uploader id', fatal=False) webpage, 'uploader id', fatal=False)
desc = self._search_regex( desc = self._search_regex(
r'"caption":"(.+?)"', webpage, 'description', default=None) r'"caption":"(.+?)"', webpage, 'description', default=None)
if desc is not None:
desc = lowercase_escape(desc)
return { return {
'id': video_id, 'id': video_id,

View File

@ -1,4 +1,4 @@
# -*- coding: utf-8 -*- # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
@ -6,6 +6,8 @@ import time
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
js_to_json,
sanitized_Request, sanitized_Request,
) )
@ -30,8 +32,7 @@ class IPrimaIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
@ -43,9 +44,42 @@ class IPrimaIE(InfoExtractor):
req.add_header('Referer', url) req.add_header('Referer', url)
playerpage = self._download_webpage(req, video_id, note='Downloading player') playerpage = self._download_webpage(req, video_id, note='Downloading player')
m3u8_url = self._search_regex(r"'src': '([^']+\.m3u8)'", playerpage, 'm3u8 url') formats = []
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') def extract_formats(format_url, format_key=None, lang=None):
ext = determine_ext(format_url)
new_formats = []
if format_key == 'hls' or ext == 'm3u8':
new_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
elif format_key == 'dash' or ext == 'mpd':
return
new_formats = self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False)
if lang:
for f in new_formats:
if not f.get('language'):
f['language'] = lang
formats.extend(new_formats)
options = self._parse_json(
self._search_regex(
r'(?s)var\s+playerOptions\s*=\s*({.+?});',
playerpage, 'player options', default='{}'),
video_id, transform_source=js_to_json, fatal=False)
if options:
for key, tracks in options.get('tracks', {}).items():
if not isinstance(tracks, list):
continue
for track in tracks:
src = track.get('src')
if src:
extract_formats(src, key.lower(), track.get('lang'))
if not formats:
for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage):
extract_formats(src)
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -165,7 +165,7 @@ class IqiyiIE(InfoExtractor):
IE_NAME = 'iqiyi' IE_NAME = 'iqiyi'
IE_DESC = '爱奇艺' IE_DESC = '爱奇艺'
_VALID_URL = r'http://(?:[^.]+\.)?iqiyi\.com/.+\.html' _VALID_URL = r'https?://(?:[^.]+\.)?iqiyi\.com/.+\.html'
_NETRC_MACHINE = 'iqiyi' _NETRC_MACHINE = 'iqiyi'
@ -501,7 +501,7 @@ class IqiyiIE(InfoExtractor):
def get_enc_key(self, video_id): def get_enc_key(self, video_id):
# TODO: automatic key extraction # TODO: automatic key extraction
# last update at 2016-01-22 for Zombie::bite # last update at 2016-01-22 for Zombie::bite
enc_key = '6ab6d0280511493ba85594779759d4ed' enc_key = '4a1caba4b4465345366f28da7c117d20'
return enc_key return enc_key
def _extract_playlist(self, webpage): def _extract_playlist(self, webpage):

View File

@ -9,7 +9,7 @@ from .youtube import YoutubeIE
class JadoreCettePubIE(InfoExtractor): class JadoreCettePubIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html' _VALID_URL = r'https?://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'
_TEST = { _TEST = {
'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html', 'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html',

Some files were not shown because too many files have changed in this diff Show More