Merge remote-tracking branch 'upstream/master' into multipart_videos

This commit is contained in:
Mark Lee 2016-03-24 18:53:03 -07:00
commit 01c0a5f619
187 changed files with 1906 additions and 550 deletions

View File

@ -163,3 +163,7 @@ Patrick Griffis
Aidan Rowe Aidan Rowe
mutantmonkey mutantmonkey
Ben Congdon Ben Congdon
Kacper Michajłow
José Joaquín Atria
Viťas Strádal
Kagami Hiiragi

View File

@ -12,15 +12,7 @@ SHAREDIR ?= $(PREFIX)/share
PYTHON ?= /usr/bin/env python PYTHON ?= /usr/bin/env python
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
ifeq ($(PREFIX),/usr) SYSCONFDIR != if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi
SYSCONFDIR=/etc
else
ifeq ($(PREFIX),/usr/local)
SYSCONFDIR=/etc
else
SYSCONFDIR=$(PREFIX)/etc
endif
endif
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
install -d $(DESTDIR)$(BINDIR) install -d $(DESTDIR)$(BINDIR)

View File

@ -831,7 +831,7 @@ To run the test, simply invoke your favorite test runner, or execute a test file
If you want to create a build of youtube-dl yourself, you'll need If you want to create a build of youtube-dl yourself, you'll need
* python * python
* make * make (both GNU make and BSD make are supported)
* pandoc * pandoc
* zip * zip
* nosetests * nosetests

View File

@ -81,6 +81,7 @@
- **BokeCC** - **BokeCC**
- **Bpb**: Bundeszentrale für politische Bildung - **Bpb**: Bundeszentrale für politische Bildung
- **BR**: Bayerischer Rundfunk Mediathek - **BR**: Bayerischer Rundfunk Mediathek
- **BravoTV**
- **Break** - **Break**
- **brightcove:legacy** - **brightcove:legacy**
- **brightcove:new** - **brightcove:new**
@ -499,6 +500,7 @@
- **Restudy** - **Restudy**
- **ReverbNation** - **ReverbNation**
- **Revision3** - **Revision3**
- **RICE**
- **RingTV** - **RingTV**
- **RottenTomatoes** - **RottenTomatoes**
- **Roxwel** - **Roxwel**
@ -617,6 +619,7 @@
- **ThePlatform** - **ThePlatform**
- **ThePlatformFeed** - **ThePlatformFeed**
- **TheSixtyOne** - **TheSixtyOne**
- **TheStar**
- **ThisAmericanLife** - **ThisAmericanLife**
- **ThisAV** - **ThisAV**
- **THVideo** - **THVideo**
@ -650,6 +653,7 @@
- **tv.dfb.de** - **tv.dfb.de**
- **TV2** - **TV2**
- **TV2Article** - **TV2Article**
- **TV3**
- **TV4**: tv4.se and tv4play.se - **TV4**: tv4.se and tv4play.se
- **TVC** - **TVC**
- **TVCArticle** - **TVCArticle**

View File

@ -222,6 +222,11 @@ class TestFormatSelection(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0] downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'dash-video-low') self.assertEqual(downloaded['format_id'], 'dash-video-low')
ydl = YDL({'format': 'bestvideo[format_id^=dash][format_id$=low]'})
ydl.process_ie_result(info_dict.copy())
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], 'dash-video-low')
formats = [ formats = [
{'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL}, {'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL},
] ]

View File

@ -1,4 +1,5 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
# Allow direct execution # Allow direct execution
@ -120,5 +121,14 @@ class TestProxy(unittest.TestCase):
response = ydl.urlopen(req).read().decode('utf-8') response = ydl.urlopen(req).read().decode('utf-8')
self.assertEqual(response, 'cn: {0}'.format(url)) self.assertEqual(response, 'cn: {0}'.format(url))
def test_proxy_with_idn(self):
ydl = YoutubeDL({
'proxy': 'localhost:{0}'.format(self.port),
})
url = 'http://中文.tw/'
response = ydl.urlopen(url).read().decode('utf-8')
# b'xn--fiq228c' is '中文'.encode('idna')
self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -28,6 +28,7 @@ from youtube_dl.utils import (
encodeFilename, encodeFilename,
escape_rfc3986, escape_rfc3986,
escape_url, escape_url,
extract_attributes,
ExtractorError, ExtractorError,
find_xpath_attr, find_xpath_attr,
fix_xml_ampersands, fix_xml_ampersands,
@ -77,6 +78,7 @@ from youtube_dl.utils import (
cli_bool_option, cli_bool_option,
) )
from youtube_dl.compat import ( from youtube_dl.compat import (
compat_chr,
compat_etree_fromstring, compat_etree_fromstring,
compat_urlparse, compat_urlparse,
compat_parse_qs, compat_parse_qs,
@ -575,11 +577,11 @@ class TestUtil(unittest.TestCase):
) )
self.assertEqual( self.assertEqual(
escape_url('http://тест.рф/фрагмент'), escape_url('http://тест.рф/фрагмент'),
'http://тест.рф/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82' 'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82'
) )
self.assertEqual( self.assertEqual(
escape_url('http://тест.рф/абв?абв=абв#абв'), escape_url('http://тест.рф/абв?абв=абв#абв'),
'http://тест.рф/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2' 'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2'
) )
self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
@ -629,6 +631,44 @@ class TestUtil(unittest.TestCase):
on = js_to_json('{"abc": "def",}') on = js_to_json('{"abc": "def",}')
self.assertEqual(json.loads(on), {'abc': 'def'}) self.assertEqual(json.loads(on), {'abc': 'def'})
def test_extract_attributes(self):
self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'})
self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'})
self.assertEqual(extract_attributes('<e x=y>'), {'x': 'y'})
self.assertEqual(extract_attributes('<e x="a \'b\' c">'), {'x': "a 'b' c"})
self.assertEqual(extract_attributes('<e x=\'a "b" c\'>'), {'x': 'a "b" c'})
self.assertEqual(extract_attributes('<e x="&#121;">'), {'x': 'y'})
self.assertEqual(extract_attributes('<e x="&#x79;">'), {'x': 'y'})
self.assertEqual(extract_attributes('<e x="&amp;">'), {'x': '&'}) # XML
self.assertEqual(extract_attributes('<e x="&quot;">'), {'x': '"'})
self.assertEqual(extract_attributes('<e x="&pound;">'), {'x': '£'}) # HTML 3.2
self.assertEqual(extract_attributes('<e x="&lambda;">'), {'x': 'λ'}) # HTML 4.0
self.assertEqual(extract_attributes('<e x="&foo">'), {'x': '&foo'})
self.assertEqual(extract_attributes('<e x="\'">'), {'x': "'"})
self.assertEqual(extract_attributes('<e x=\'"\'>'), {'x': '"'})
self.assertEqual(extract_attributes('<e x >'), {'x': None})
self.assertEqual(extract_attributes('<e x=y a>'), {'x': 'y', 'a': None})
self.assertEqual(extract_attributes('<e x= y>'), {'x': 'y'})
self.assertEqual(extract_attributes('<e x=1 y=2 x=3>'), {'y': '2', 'x': '3'})
self.assertEqual(extract_attributes('<e \nx=\ny\n>'), {'x': 'y'})
self.assertEqual(extract_attributes('<e \nx=\n"y"\n>'), {'x': 'y'})
self.assertEqual(extract_attributes("<e \nx=\n'y'\n>"), {'x': 'y'})
self.assertEqual(extract_attributes('<e \nx="\ny\n">'), {'x': '\ny\n'})
self.assertEqual(extract_attributes('<e CAPS=x>'), {'caps': 'x'}) # Names lowercased
self.assertEqual(extract_attributes('<e x=1 X=2>'), {'x': '2'})
self.assertEqual(extract_attributes('<e X=1 x=2>'), {'x': '2'})
self.assertEqual(extract_attributes('<e _:funny-name1=1>'), {'_:funny-name1': '1'})
self.assertEqual(extract_attributes('<e x="Fáilte 世界 \U0001f600">'), {'x': 'Fáilte 世界 \U0001f600'})
self.assertEqual(extract_attributes('<e x="décompose&#769;">'), {'x': 'décompose\u0301'})
# "Narrow" Python builds don't support unicode code points outside BMP.
try:
compat_chr(0x10000)
supports_outside_bmp = True
except ValueError:
supports_outside_bmp = False
if supports_outside_bmp:
self.assertEqual(extract_attributes('<e x="Smile &#128512;!">'), {'x': 'Smile \U0001f600!'})
def test_clean_html(self): def test_clean_html(self):
self.assertEqual(clean_html('a:\nb'), 'a: b') self.assertEqual(clean_html('a:\nb'), 'a: b')
self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') self.assertEqual(clean_html('a:\n "b"'), 'a: "b"')
@ -662,6 +702,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_count('1.000'), 1000) self.assertEqual(parse_count('1.000'), 1000)
self.assertEqual(parse_count('1.1k'), 1100) self.assertEqual(parse_count('1.1k'), 1100)
self.assertEqual(parse_count('1.1kk'), 1100000) self.assertEqual(parse_count('1.1kk'), 1100000)
self.assertEqual(parse_count('1.1kk '), 1100000)
self.assertEqual(parse_count('1.1kk views'), 1100000)
def test_version_tuple(self): def test_version_tuple(self):
self.assertEqual(version_tuple('1'), (1,)) self.assertEqual(version_tuple('1'), (1,))

View File

@ -8,6 +8,6 @@ deps =
passenv = HOME passenv = HOME
defaultargs = test --exclude test_download.py --exclude test_age_restriction.py defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
--exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_subtitles.py --exclude test_write_annotations.py
--exclude test_youtube_lists.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html
# test.test_download:TestDownload.test_NowVideo # test.test_download:TestDownload.test_NowVideo

View File

@ -906,7 +906,7 @@ class YoutubeDL(object):
'*=': lambda attr, value: value in attr, '*=': lambda attr, value: value in attr,
} }
str_operator_rex = re.compile(r'''(?x) str_operator_rex = re.compile(r'''(?x)
\s*(?P<key>ext|acodec|vcodec|container|protocol) \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)? \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
\s*(?P<value>[a-zA-Z0-9._-]+) \s*(?P<value>[a-zA-Z0-9._-]+)
\s*$ \s*$
@ -1865,7 +1865,7 @@ class YoutubeDL(object):
if fdict.get('language'): if fdict.get('language'):
if res: if res:
res += ' ' res += ' '
res += '[%s]' % fdict['language'] res += '[%s] ' % fdict['language']
if fdict.get('format_note') is not None: if fdict.get('format_note') is not None:
res += fdict['format_note'] + ' ' res += fdict['format_note'] + ' '
if fdict.get('tbr') is not None: if fdict.get('tbr') is not None:

View File

@ -144,14 +144,20 @@ def _real_main(argv=None):
if numeric_limit is None: if numeric_limit is None:
parser.error('invalid max_filesize specified') parser.error('invalid max_filesize specified')
opts.max_filesize = numeric_limit opts.max_filesize = numeric_limit
if opts.retries is not None:
if opts.retries in ('inf', 'infinite'): def parse_retries(retries):
opts_retries = float('inf') if retries in ('inf', 'infinite'):
parsed_retries = float('inf')
else: else:
try: try:
opts_retries = int(opts.retries) parsed_retries = int(retries)
except (TypeError, ValueError): except (TypeError, ValueError):
parser.error('invalid retry count specified') parser.error('invalid retry count specified')
return parsed_retries
if opts.retries is not None:
opts.retries = parse_retries(opts.retries)
if opts.fragment_retries is not None:
opts.fragment_retries = parse_retries(opts.fragment_retries)
if opts.buffersize is not None: if opts.buffersize is not None:
numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
if numeric_buffersize is None: if numeric_buffersize is None:
@ -301,7 +307,8 @@ def _real_main(argv=None):
'force_generic_extractor': opts.force_generic_extractor, 'force_generic_extractor': opts.force_generic_extractor,
'ratelimit': opts.ratelimit, 'ratelimit': opts.ratelimit,
'nooverwrites': opts.nooverwrites, 'nooverwrites': opts.nooverwrites,
'retries': opts_retries, 'retries': opts.retries,
'fragment_retries': opts.fragment_retries,
'buffersize': opts.buffersize, 'buffersize': opts.buffersize,
'noresizebuffer': opts.noresizebuffer, 'noresizebuffer': opts.noresizebuffer,
'continuedl': opts.continue_dl, 'continuedl': opts.continue_dl,

View File

@ -77,6 +77,11 @@ try:
except ImportError: # Python 2 except ImportError: # Python 2
from urllib import urlretrieve as compat_urlretrieve from urllib import urlretrieve as compat_urlretrieve
try:
from html.parser import HTMLParser as compat_HTMLParser
except ImportError: # Python 2
from HTMLParser import HTMLParser as compat_HTMLParser
try: try:
from subprocess import DEVNULL from subprocess import DEVNULL
@ -251,6 +256,16 @@ else:
el.text = el.text.decode('utf-8') el.text = el.text.decode('utf-8')
return doc return doc
if sys.version_info < (2, 7):
# Here comes the crazy part: In 2.6, if the xpath is a unicode,
# .//node does not match if a node is a direct child of . !
def compat_xpath(xpath):
if isinstance(xpath, compat_str):
xpath = xpath.encode('ascii')
return xpath
else:
compat_xpath = lambda xpath: xpath
try: try:
from urllib.parse import parse_qs as compat_parse_qs from urllib.parse import parse_qs as compat_parse_qs
except ImportError: # Python 2 except ImportError: # Python 2
@ -543,6 +558,7 @@ else:
from tokenize import generate_tokens as compat_tokenize_tokenize from tokenize import generate_tokens as compat_tokenize_tokenize
__all__ = [ __all__ = [
'compat_HTMLParser',
'compat_HTTPError', 'compat_HTTPError',
'compat_basestring', 'compat_basestring',
'compat_chr', 'compat_chr',
@ -579,6 +595,7 @@ __all__ = [
'compat_urlparse', 'compat_urlparse',
'compat_urlretrieve', 'compat_urlretrieve',
'compat_xml_parse_error', 'compat_xml_parse_error',
'compat_xpath',
'shlex_quote', 'shlex_quote',
'subprocess_check_output', 'subprocess_check_output',
'workaround_optparse_bug9161', 'workaround_optparse_bug9161',

View File

@ -115,6 +115,10 @@ class FileDownloader(object):
return '%10s' % '---b/s' return '%10s' % '---b/s'
return '%10s' % ('%s/s' % format_bytes(speed)) return '%10s' % ('%s/s' % format_bytes(speed))
@staticmethod
def format_retries(retries):
return 'inf' if retries == float('inf') else '%.0f' % retries
@staticmethod @staticmethod
def best_block_size(elapsed_time, bytes): def best_block_size(elapsed_time, bytes):
new_min = max(bytes / 2.0, 1.0) new_min = max(bytes / 2.0, 1.0)
@ -297,7 +301,9 @@ class FileDownloader(object):
def report_retry(self, count, retries): def report_retry(self, count, retries):
"""Report retry in case of HTTP error 5xx""" """Report retry in case of HTTP error 5xx"""
self.to_screen('[download] Got server HTTP error. Retrying (attempt %d of %.0f)...' % (count, retries)) self.to_screen(
'[download] Got server HTTP error. Retrying (attempt %d of %s)...'
% (count, self.format_retries(retries)))
def report_file_already_downloaded(self, file_name): def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded.""" """Report file has already been fully downloaded."""

View File

@ -4,6 +4,7 @@ import os
import re import re
from .fragment import FragmentFD from .fragment import FragmentFD
from ..compat import compat_urllib_error
from ..utils import ( from ..utils import (
sanitize_open, sanitize_open,
encodeFilename, encodeFilename,
@ -36,20 +37,41 @@ class DashSegmentsFD(FragmentFD):
segments_filenames = [] segments_filenames = []
def append_url_to_file(target_url, target_filename): fragment_retries = self.params.get('fragment_retries', 0)
success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)})
if not success: def append_url_to_file(target_url, tmp_filename, segment_name):
target_filename = '%s-%s' % (tmp_filename, segment_name)
count = 0
while count <= fragment_retries:
try:
success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)})
if not success:
return False
down, target_sanitized = sanitize_open(target_filename, 'rb')
ctx['dest_stream'].write(down.read())
down.close()
segments_filenames.append(target_sanitized)
break
except (compat_urllib_error.HTTPError, ) as err:
# YouTube may often return 404 HTTP error for a fragment causing the
# whole download to fail. However if the same fragment is immediately
# retried with the same request data this usually succeeds (1-2 attemps
# is usually enough) thus allowing to download the whole file successfully.
# So, we will retry all fragments that fail with 404 HTTP error for now.
if err.code != 404:
raise
# Retry fragment
count += 1
if count <= fragment_retries:
self.report_retry_fragment(segment_name, count, fragment_retries)
if count > fragment_retries:
self.report_error('giving up after %s fragment retries' % fragment_retries)
return False return False
down, target_sanitized = sanitize_open(target_filename, 'rb')
ctx['dest_stream'].write(down.read())
down.close()
segments_filenames.append(target_sanitized)
if initialization_url: if initialization_url:
append_url_to_file(initialization_url, ctx['tmpfilename'] + '-Init') append_url_to_file(initialization_url, ctx['tmpfilename'], 'Init')
for i, segment_url in enumerate(segment_urls): for i, segment_url in enumerate(segment_urls):
segment_filename = '%s-Seg%d' % (ctx['tmpfilename'], i) append_url_to_file(segment_url, ctx['tmpfilename'], 'Seg%d' % i)
append_url_to_file(segment_url, segment_filename)
self._finish_frag_download(ctx) self._finish_frag_download(ctx)

View File

@ -19,8 +19,17 @@ class HttpQuietDownloader(HttpFD):
class FragmentFD(FileDownloader): class FragmentFD(FileDownloader):
""" """
A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests). A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests).
Available options:
fragment_retries: Number of times to retry a fragment for HTTP error (DASH only)
""" """
def report_retry_fragment(self, fragment_name, count, retries):
self.to_screen(
'[download] Got server HTTP error. Retrying fragment %s (attempt %d of %s)...'
% (fragment_name, count, self.format_retries(retries)))
def _prepare_and_start_frag_download(self, ctx): def _prepare_and_start_frag_download(self, ctx):
self._prepare_frag_download(ctx) self._prepare_frag_download(ctx)
self._start_frag_download(ctx) self._start_frag_download(ctx)

View File

@ -72,6 +72,7 @@ from .bet import BetIE
from .bigflix import BigflixIE from .bigflix import BigflixIE
from .bild import BildIE from .bild import BildIE
from .bilibili import BiliBiliIE from .bilibili import BiliBiliIE
from .biobiochiletv import BioBioChileTVIE
from .bleacherreport import ( from .bleacherreport import (
BleacherReportIE, BleacherReportIE,
BleacherReportCMSIE, BleacherReportCMSIE,
@ -81,6 +82,7 @@ from .bloomberg import BloombergIE
from .bokecc import BokeCCIE from .bokecc import BokeCCIE
from .bpb import BpbIE from .bpb import BpbIE
from .br import BRIE from .br import BRIE
from .bravotv import BravoTVIE
from .breakcom import BreakIE from .breakcom import BreakIE
from .brightcove import ( from .brightcove import (
BrightcoveLegacyIE, BrightcoveLegacyIE,
@ -107,6 +109,7 @@ from .cbsnews import (
) )
from .cbssports import CBSSportsIE from .cbssports import CBSSportsIE
from .ccc import CCCIE from .ccc import CCCIE
from .cda import CDAIE
from .ceskatelevize import CeskaTelevizeIE from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE from .channel9 import Channel9IE
from .chaturbate import ChaturbateIE from .chaturbate import ChaturbateIE
@ -135,6 +138,7 @@ from .collegerama import CollegeRamaIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .comcarcoff import ComCarCoffIE from .comcarcoff import ComCarCoffIE
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
from .commonprotocols import RtmpIE
from .condenast import CondeNastIE from .condenast import CondeNastIE
from .cracked import CrackedIE from .cracked import CrackedIE
from .crackle import CrackleIE from .crackle import CrackleIE
@ -282,6 +286,7 @@ from .goshgay import GoshgayIE
from .gputechconf import GPUTechConfIE from .gputechconf import GPUTechConfIE
from .groupon import GrouponIE from .groupon import GrouponIE
from .hark import HarkIE from .hark import HarkIE
from .hbo import HBOIE
from .hearthisat import HearThisAtIE from .hearthisat import HearThisAtIE
from .heise import HeiseIE from .heise import HeiseIE
from .hellporno import HellPornoIE from .hellporno import HellPornoIE
@ -405,6 +410,7 @@ from .mit import TechTVMITIE, MITIE, OCWMITIE
from .mitele import MiTeleIE from .mitele import MiTeleIE
from .mixcloud import MixcloudIE from .mixcloud import MixcloudIE
from .mlb import MLBIE from .mlb import MLBIE
from .mnet import MnetIE
from .mpora import MporaIE from .mpora import MporaIE
from .moevideo import MoeVideoIE from .moevideo import MoeVideoIE
from .mofosex import MofosexIE from .mofosex import MofosexIE
@ -530,6 +536,7 @@ from .ooyala import (
OoyalaIE, OoyalaIE,
OoyalaExternalIE, OoyalaExternalIE,
) )
from .openload import OpenloadIE
from .ora import OraTVIE from .ora import OraTVIE
from .orf import ( from .orf import (
ORFTVthekIE, ORFTVthekIE,
@ -625,6 +632,7 @@ from .ruutu import RuutuIE
from .sandia import SandiaIE from .sandia import SandiaIE
from .safari import ( from .safari import (
SafariIE, SafariIE,
SafariApiIE,
SafariCourseIE, SafariCourseIE,
) )
from .sapo import SapoIE from .sapo import SapoIE
@ -736,7 +744,9 @@ from .theplatform import (
ThePlatformIE, ThePlatformIE,
ThePlatformFeedIE, ThePlatformFeedIE,
) )
from .thescene import TheSceneIE
from .thesixtyone import TheSixtyOneIE from .thesixtyone import TheSixtyOneIE
from .thestar import TheStarIE
from .thisamericanlife import ThisAmericanLifeIE from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE from .thisav import ThisAVIE
from .tinypic import TinyPicIE from .tinypic import TinyPicIE
@ -783,6 +793,7 @@ from .tv2 import (
TV2IE, TV2IE,
TV2ArticleIE, TV2ArticleIE,
) )
from .tv3 import TV3IE
from .tv4 import TV4IE from .tv4 import TV4IE
from .tvc import ( from .tvc import (
TVCIE, TVCIE,
@ -950,7 +961,9 @@ from .youtube import (
YoutubeChannelIE, YoutubeChannelIE,
YoutubeFavouritesIE, YoutubeFavouritesIE,
YoutubeHistoryIE, YoutubeHistoryIE,
YoutubeLiveIE,
YoutubePlaylistIE, YoutubePlaylistIE,
YoutubePlaylistsIE,
YoutubeRecommendedIE, YoutubeRecommendedIE,
YoutubeSearchDateIE, YoutubeSearchDateIE,
YoutubeSearchIE, YoutubeSearchIE,
@ -960,7 +973,6 @@ from .youtube import (
YoutubeTruncatedIDIE, YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE, YoutubeTruncatedURLIE,
YoutubeUserIE, YoutubeUserIE,
YoutubePlaylistsIE,
YoutubeWatchLaterIE, YoutubeWatchLaterIE,
) )
from .zapiks import ZapiksIE from .zapiks import ZapiksIE

View File

@ -12,7 +12,7 @@ from ..utils import (
class ABCIE(InfoExtractor): class ABCIE(InfoExtractor):
IE_NAME = 'abc.net.au' IE_NAME = 'abc.net.au'
_VALID_URL = r'http://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)' _VALID_URL = r'https?://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',

View File

@ -16,7 +16,7 @@ from ..utils import (
class AddAnimeIE(InfoExtractor): class AddAnimeIE(InfoExtractor):
_VALID_URL = r'http://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)' _VALID_URL = r'https?://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', 'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
'md5': '72954ea10bc979ab5e2eb288b21425a0', 'md5': '72954ea10bc979ab5e2eb288b21425a0',

View File

@ -6,7 +6,7 @@ from ..utils import int_or_none
class AftonbladetIE(InfoExtractor): class AftonbladetIE(InfoExtractor):
_VALID_URL = r'http://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)' _VALID_URL = r'https?://tv\.aftonbladet\.se/abtv/articles/(?P<id>[0-9]+)'
_TEST = { _TEST = {
'url': 'http://tv.aftonbladet.se/abtv/articles/36015', 'url': 'http://tv.aftonbladet.se/abtv/articles/36015',
'info_dict': { 'info_dict': {

View File

@ -4,7 +4,7 @@ from .common import InfoExtractor
class AlJazeeraIE(InfoExtractor): class AlJazeeraIE(InfoExtractor):
_VALID_URL = r'http://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html' _VALID_URL = r'https?://www\.aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
_TEST = { _TEST = {
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html', 'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
@ -13,24 +13,18 @@ class AlJazeeraIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Slum - Episode 1: Deliverance', 'title': 'The Slum - Episode 1: Deliverance',
'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.', 'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',
'uploader': 'Al Jazeera English', 'uploader_id': '665003303001',
'timestamp': 1411116829,
'upload_date': '20140919',
}, },
'add_ie': ['BrightcoveLegacy'], 'add_ie': ['BrightcoveNew'],
'skip': 'Not accessible from Travis CI server', 'skip': 'Not accessible from Travis CI server',
} }
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
def _real_extract(self, url): def _real_extract(self, url):
program_name = self._match_id(url) program_name = self._match_id(url)
webpage = self._download_webpage(url, program_name) webpage = self._download_webpage(url, program_name)
brightcove_id = self._search_regex( brightcove_id = self._search_regex(
r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id') r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
return {
'_type': 'url',
'url': (
'brightcove:'
'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc'
'&%40videoPlayer={0}'.format(brightcove_id)
),
'ie_key': 'BrightcoveLegacy',
}

View File

@ -3,10 +3,14 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse from ..compat import (
compat_urlparse,
compat_str,
)
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
encode_dict, encode_dict,
extract_attributes,
ExtractorError, ExtractorError,
sanitized_Request, sanitized_Request,
urlencode_postdata, urlencode_postdata,
@ -18,7 +22,7 @@ class AnimeOnDemandIE(InfoExtractor):
_LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in' _LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in'
_APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply' _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply'
_NETRC_MACHINE = 'animeondemand' _NETRC_MACHINE = 'animeondemand'
_TEST = { _TESTS = [{
'url': 'https://www.anime-on-demand.de/anime/161', 'url': 'https://www.anime-on-demand.de/anime/161',
'info_dict': { 'info_dict': {
'id': '161', 'id': '161',
@ -26,7 +30,19 @@ class AnimeOnDemandIE(InfoExtractor):
'description': 'md5:6681ce3c07c7189d255ac6ab23812d31', 'description': 'md5:6681ce3c07c7189d255ac6ab23812d31',
}, },
'playlist_mincount': 4, 'playlist_mincount': 4,
} }, {
# Film wording is used instead of Episode
'url': 'https://www.anime-on-demand.de/anime/39',
'only_matching': True,
}, {
# Episodes without titles
'url': 'https://www.anime-on-demand.de/anime/162',
'only_matching': True,
}, {
# ger/jap, Dub/OmU, account required
'url': 'https://www.anime-on-demand.de/anime/169',
'only_matching': True,
}]
def _login(self): def _login(self):
(username, password) = self._get_login_info() (username, password) = self._get_login_info()
@ -36,6 +52,10 @@ class AnimeOnDemandIE(InfoExtractor):
login_page = self._download_webpage( login_page = self._download_webpage(
self._LOGIN_URL, None, 'Downloading login page') self._LOGIN_URL, None, 'Downloading login page')
if '>Our licensing terms allow the distribution of animes only to German-speaking countries of Europe' in login_page:
self.raise_geo_restricted(
'%s is only available in German-speaking countries of Europe' % self.IE_NAME)
login_form = self._form_hidden_inputs('new_user', login_page) login_form = self._form_hidden_inputs('new_user', login_page)
login_form.update({ login_form.update({
@ -91,14 +111,22 @@ class AnimeOnDemandIE(InfoExtractor):
entries = [] entries = []
for episode_html in re.findall(r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage): for num, episode_html in enumerate(re.findall(
m = re.search( r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', webpage), 1):
r'class="episodebox-title"[^>]+title="Episode (?P<number>\d+) - (?P<title>.+?)"', episode_html) episodebox_title = self._search_regex(
if not m: (r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1',
r'class="episodebox-title"[^>]+>(?P<title>.+?)<'),
episode_html, 'episodebox title', default=None, group='title')
if not episodebox_title:
continue continue
episode_number = int(m.group('number')) episode_number = int(self._search_regex(
episode_title = m.group('title') r'(?:Episode|Film)\s*(\d+)',
episodebox_title, 'episode number', default=num))
episode_title = self._search_regex(
r'(?:Episode|Film)\s*\d+\s*-\s*(.+)',
episodebox_title, 'episode title', default=None)
video_id = 'episode-%d' % episode_number video_id = 'episode-%d' % episode_number
common_info = { common_info = {
@ -110,33 +138,86 @@ class AnimeOnDemandIE(InfoExtractor):
formats = [] formats = []
playlist_url = self._search_regex( for input_ in re.findall(
r'data-playlist=(["\'])(?P<url>.+?)\1', r'<input[^>]+class=["\'].*?streamstarter_html5[^>]+>', episode_html):
episode_html, 'data playlist', default=None, group='url') attributes = extract_attributes(input_)
if playlist_url: playlist_urls = []
request = sanitized_Request( for playlist_key in ('data-playlist', 'data-otherplaylist'):
compat_urlparse.urljoin(url, playlist_url), playlist_url = attributes.get(playlist_key)
headers={ if isinstance(playlist_url, compat_str) and re.match(
'X-Requested-With': 'XMLHttpRequest', r'/?[\da-zA-Z]+', playlist_url):
'X-CSRF-Token': csrf_token, playlist_urls.append(attributes[playlist_key])
'Referer': url, if not playlist_urls:
'Accept': 'application/json, text/javascript, */*; q=0.01', continue
})
playlist = self._download_json( lang = attributes.get('data-lang')
request, video_id, 'Downloading playlist JSON', fatal=False) lang_note = attributes.get('value')
if playlist:
playlist = playlist['playlist'][0] for playlist_url in playlist_urls:
title = playlist['title'] kind = self._search_regex(
r'videomaterialurl/\d+/([^/]+)/',
playlist_url, 'media kind', default=None)
format_id_list = []
if lang:
format_id_list.append(lang)
if kind:
format_id_list.append(kind)
if not format_id_list:
format_id_list.append(compat_str(num))
format_id = '-'.join(format_id_list)
format_note = ', '.join(filter(None, (kind, lang_note)))
request = sanitized_Request(
compat_urlparse.urljoin(url, playlist_url),
headers={
'X-Requested-With': 'XMLHttpRequest',
'X-CSRF-Token': csrf_token,
'Referer': url,
'Accept': 'application/json, text/javascript, */*; q=0.01',
})
playlist = self._download_json(
request, video_id, 'Downloading %s playlist JSON' % format_id,
fatal=False)
if not playlist:
continue
start_video = playlist.get('startvideo', 0)
playlist = playlist.get('playlist')
if not playlist or not isinstance(playlist, list):
continue
playlist = playlist[start_video]
title = playlist.get('title')
if not title:
continue
description = playlist.get('description') description = playlist.get('description')
for source in playlist.get('sources', []): for source in playlist.get('sources', []):
file_ = source.get('file') file_ = source.get('file')
if file_ and determine_ext(file_) == 'm3u8': if not file_:
formats = self._extract_m3u8_formats( continue
ext = determine_ext(file_)
format_id_list = [lang, kind]
if ext == 'm3u8':
format_id_list.append('hls')
elif source.get('type') == 'video/dash' or ext == 'mpd':
format_id_list.append('dash')
format_id = '-'.join(filter(None, format_id_list))
if ext == 'm3u8':
file_formats = self._extract_m3u8_formats(
file_, video_id, 'mp4', file_, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls') entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False)
elif source.get('type') == 'video/dash' or ext == 'mpd':
continue
file_formats = self._extract_mpd_formats(
file_, video_id, mpd_id=format_id, fatal=False)
else:
continue
for f in file_formats:
f.update({
'language': lang,
'format_note': format_note,
})
formats.extend(file_formats)
if formats: if formats:
self._sort_formats(formats)
f = common_info.copy() f = common_info.copy()
f.update({ f.update({
'title': title, 'title': title,
@ -145,16 +226,18 @@ class AnimeOnDemandIE(InfoExtractor):
}) })
entries.append(f) entries.append(f)
m = re.search( # Extract teaser only when full episode is not available
r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<', if not formats:
episode_html) m = re.search(
if m: r'data-dialog-header=(["\'])(?P<title>.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>Teaser<',
f = common_info.copy() episode_html)
f.update({ if m:
'id': '%s-teaser' % f['id'], f = common_info.copy()
'title': m.group('title'), f.update({
'url': compat_urlparse.urljoin(url, m.group('href')), 'id': '%s-teaser' % f['id'],
}) 'title': m.group('title'),
entries.append(f) 'url': compat_urlparse.urljoin(url, m.group('href')),
})
entries.append(f)
return self.playlist_result(entries, anime_id, anime_title, anime_description) return self.playlist_result(entries, anime_id, anime_title, anime_description)

View File

@ -5,7 +5,7 @@ from .common import InfoExtractor
class AolIE(InfoExtractor): class AolIE(InfoExtractor):
IE_NAME = 'on.aol.com' IE_NAME = 'on.aol.com'
_VALID_URL = r'(?:aol-video:|http://on\.aol\.com/video/.*-)(?P<id>[0-9]+)(?:$|\?)' _VALID_URL = r'(?:aol-video:|https?://on\.aol\.com/video/.*-)(?P<id>[0-9]+)(?:$|\?)'
_TESTS = [{ _TESTS = [{
'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img',
@ -25,7 +25,7 @@ class AolIE(InfoExtractor):
class AolFeaturesIE(InfoExtractor): class AolFeaturesIE(InfoExtractor):
IE_NAME = 'features.aol.com' IE_NAME = 'features.aol.com'
_VALID_URL = r'http://features\.aol\.com/video/(?P<id>[^/?#]+)' _VALID_URL = r'https?://features\.aol\.com/video/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://features.aol.com/video/behind-secret-second-careers-late-night-talk-show-hosts', 'url': 'http://features.aol.com/video/behind-secret-second-careers-late-night-talk-show-hosts',

View File

@ -23,7 +23,7 @@ from ..utils import (
class ArteTvIE(InfoExtractor): class ArteTvIE(InfoExtractor):
_VALID_URL = r'http://videos\.arte\.tv/(?P<lang>fr|de|en|es)/.*-(?P<id>.*?)\.html' _VALID_URL = r'https?://videos\.arte\.tv/(?P<lang>fr|de|en|es)/.*-(?P<id>.*?)\.html'
IE_NAME = 'arte.tv' IE_NAME = 'arte.tv'
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -98,7 +98,7 @@ class AzubuIE(InfoExtractor):
class AzubuLiveIE(InfoExtractor): class AzubuLiveIE(InfoExtractor):
_VALID_URL = r'http://www.azubu.tv/(?P<id>[^/]+)$' _VALID_URL = r'https?://www.azubu.tv/(?P<id>[^/]+)$'
_TEST = { _TEST = {
'url': 'http://www.azubu.tv/MarsTVMDLen', 'url': 'http://www.azubu.tv/MarsTVMDLen',

View File

@ -9,7 +9,7 @@ from ..utils import unescapeHTML
class BaiduVideoIE(InfoExtractor): class BaiduVideoIE(InfoExtractor):
IE_DESC = '百度视频' IE_DESC = '百度视频'
_VALID_URL = r'http://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm' _VALID_URL = r'https?://v\.baidu\.com/(?P<type>[a-z]+)/(?P<id>\d+)\.htm'
_TESTS = [{ _TESTS = [{
'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6', 'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6',
'info_dict': { 'info_dict': {

View File

@ -942,7 +942,7 @@ class BBCIE(BBCCoUkIE):
class BBCCoUkArticleIE(InfoExtractor): class BBCCoUkArticleIE(InfoExtractor):
_VALID_URL = 'http://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
IE_NAME = 'bbc.co.uk:article' IE_NAME = 'bbc.co.uk:article'
IE_DESC = 'BBC articles' IE_DESC = 'BBC articles'

View File

@ -8,7 +8,7 @@ from ..utils import url_basename
class BehindKinkIE(InfoExtractor): class BehindKinkIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)' _VALID_URL = r'https?://(?:www\.)?behindkink\.com/(?P<year>[0-9]{4})/(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<id>[^/#?_]+)'
_TEST = { _TEST = {
'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/', 'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/',
'md5': '507b57d8fdcd75a41a9a7bdb7989c762', 'md5': '507b57d8fdcd75a41a9a7bdb7989c762',

View File

@ -14,7 +14,7 @@ from ..utils import (
class BiliBiliIE(InfoExtractor): class BiliBiliIE(InfoExtractor):
_VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?' _VALID_URL = r'https?://www\.bilibili\.(?:tv|com)/video/av(?P<id>\d+)(?:/index_(?P<page_num>\d+).html)?'
_TESTS = [{ _TESTS = [{
'url': 'http://www.bilibili.tv/video/av1074402/', 'url': 'http://www.bilibili.tv/video/av1074402/',

View File

@ -0,0 +1,86 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import remove_end
class BioBioChileTVIE(InfoExtractor):
_VALID_URL = r'https?://tv\.biobiochile\.cl/notas/(?:[^/]+/)+(?P<id>[^/]+)\.shtml'
_TESTS = [{
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml',
'md5': '26f51f03cf580265defefb4518faec09',
'info_dict': {
'id': 'sobre-camaras-y-camarillas-parlamentarias',
'ext': 'mp4',
'title': 'Sobre Cámaras y camarillas parlamentarias',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'Fernando Atria',
},
}, {
# different uploader layout
'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml',
'md5': 'edc2e6b58974c46d5b047dea3c539ff3',
'info_dict': {
'id': 'natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades',
'ext': 'mp4',
'title': 'Natalia Valdebenito repasa a diputado Hasbún: Pasó a la categoría de hablar brutalidades',
'thumbnail': 're:^https?://.*\.jpg$',
'uploader': 'Piangella Obrador',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml',
'only_matching': True,
}, {
'url': 'http://tv.biobiochile.cl/notas/2015/10/21/exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo.shtml',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV')
file_url = self._search_regex(
r'loadFWPlayerVideo\([^,]+,\s*(["\'])(?P<url>.+?)\1',
webpage, 'file url', group='url')
base_url = self._search_regex(
r'file\s*:\s*(["\'])(?P<url>.+?)\1\s*\+\s*fileURL', webpage,
'base url', default='http://unlimited2-cl.digitalproserver.com/bbtv/',
group='url')
formats = self._extract_m3u8_formats(
'%s%s/playlist.m3u8' % (base_url, file_url), video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
f = {
'url': '%s%s' % (base_url, file_url),
'format_id': 'http',
'protocol': 'http',
'preference': 1,
}
if formats:
f_copy = formats[-1].copy()
f_copy.update(f)
f = f_copy
formats.append(f)
self._sort_formats(formats)
thumbnail = self._og_search_thumbnail(webpage)
uploader = self._html_search_regex(
r'<a[^>]+href=["\']https?://busca\.biobiochile\.cl/author[^>]+>(.+?)</a>',
webpage, 'uploader', fatal=False)
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'uploader': uploader,
'formats': formats,
}

View File

@ -33,7 +33,7 @@ class BokeCCBaseIE(InfoExtractor):
class BokeCCIE(BokeCCBaseIE): class BokeCCIE(BokeCCBaseIE):
_IE_DESC = 'CC视频' _IE_DESC = 'CC视频'
_VALID_URL = r'http://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)'
_TESTS = [{ _TESTS = [{
'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B', 'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B',

View File

@ -12,7 +12,7 @@ from ..utils import (
class BpbIE(InfoExtractor): class BpbIE(InfoExtractor):
IE_DESC = 'Bundeszentrale für politische Bildung' IE_DESC = 'Bundeszentrale für politische Bildung'
_VALID_URL = r'http://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/' _VALID_URL = r'https?://www\.bpb\.de/mediathek/(?P<id>[0-9]+)/'
_TEST = { _TEST = {
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr', 'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',

View File

@ -0,0 +1,28 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import smuggle_url
class BravoTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+videos/(?P<id>[^/?]+)'
_TEST = {
'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale',
'md5': 'd60cdf68904e854fac669bd26cccf801',
'info_dict': {
'id': 'LitrBdX64qLn',
'ext': 'mp4',
'title': 'Last Chance Kitchen Returns',
'description': 'S13: Last Chance Kitchen Returns for Top Chef Season 13',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
account_pid = self._search_regex(r'"account_pid"\s*:\s*"([^"]+)"', webpage, 'account pid')
release_pid = self._search_regex(r'"release_pid"\s*:\s*"([^"]+)"', webpage, 'release pid')
return self.url_result(smuggle_url(
'http://link.theplatform.com/s/%s/%s?mbr=true&switch=progressive' % (account_pid, release_pid),
{'force_smil_url': True}), 'ThePlatform', release_pid)

View File

@ -11,7 +11,7 @@ from ..utils import (
class BreakIE(InfoExtractor): class BreakIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056', 'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056',
'info_dict': { 'info_dict': {

View File

@ -9,7 +9,6 @@ from ..compat import (
compat_etree_fromstring, compat_etree_fromstring,
compat_parse_qs, compat_parse_qs,
compat_str, compat_str,
compat_urllib_parse,
compat_urllib_parse_urlparse, compat_urllib_parse_urlparse,
compat_urlparse, compat_urlparse,
compat_xml_parse_error, compat_xml_parse_error,
@ -24,16 +23,16 @@ from ..utils import (
js_to_json, js_to_json,
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
sanitized_Request,
unescapeHTML, unescapeHTML,
unsmuggle_url, unsmuggle_url,
update_url_query,
) )
class BrightcoveLegacyIE(InfoExtractor): class BrightcoveLegacyIE(InfoExtractor):
IE_NAME = 'brightcove:legacy' IE_NAME = 'brightcove:legacy'
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)' _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' _FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated'
_TESTS = [ _TESTS = [
{ {
@ -156,8 +155,8 @@ class BrightcoveLegacyIE(InfoExtractor):
# Not all pages define this value # Not all pages define this value
if playerKey is not None: if playerKey is not None:
params['playerKey'] = playerKey params['playerKey'] = playerKey
# The three fields hold the id of the video # These fields hold the id of the video
videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
if videoPlayer is not None: if videoPlayer is not None:
params['@videoPlayer'] = videoPlayer params['@videoPlayer'] = videoPlayer
linkBase = find_param('linkBaseURL') linkBase = find_param('linkBaseURL')
@ -185,8 +184,7 @@ class BrightcoveLegacyIE(InfoExtractor):
@classmethod @classmethod
def _make_brightcove_url(cls, params): def _make_brightcove_url(cls, params):
data = compat_urllib_parse.urlencode(params) return update_url_query(cls._FEDERATED_URL, params)
return cls._FEDERATED_URL_TEMPLATE % data
@classmethod @classmethod
def _extract_brightcove_url(cls, webpage): def _extract_brightcove_url(cls, webpage):
@ -240,7 +238,7 @@ class BrightcoveLegacyIE(InfoExtractor):
# We set the original url as the default 'Referer' header # We set the original url as the default 'Referer' header
referer = smuggled_data.get('Referer', url) referer = smuggled_data.get('Referer', url)
return self._get_video_info( return self._get_video_info(
videoPlayer[0], query_str, query, referer=referer) videoPlayer[0], query, referer=referer)
elif 'playerKey' in query: elif 'playerKey' in query:
player_key = query['playerKey'] player_key = query['playerKey']
return self._get_playlist_info(player_key[0]) return self._get_playlist_info(player_key[0])
@ -249,15 +247,14 @@ class BrightcoveLegacyIE(InfoExtractor):
'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?', 'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
expected=True) expected=True)
def _get_video_info(self, video_id, query_str, query, referer=None): def _get_video_info(self, video_id, query, referer=None):
request_url = self._FEDERATED_URL_TEMPLATE % query_str headers = {}
req = sanitized_Request(request_url)
linkBase = query.get('linkBaseURL') linkBase = query.get('linkBaseURL')
if linkBase is not None: if linkBase is not None:
referer = linkBase[0] referer = linkBase[0]
if referer is not None: if referer is not None:
req.add_header('Referer', referer) headers['Referer'] = referer
webpage = self._download_webpage(req, video_id) webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query)
error_msg = self._html_search_regex( error_msg = self._html_search_regex(
r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage, r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
@ -415,8 +412,8 @@ class BrightcoveNewIE(InfoExtractor):
# Look for iframe embeds [1] # Look for iframe embeds [1]
for _, url in re.findall( for _, url in re.findall(
r'<iframe[^>]+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage): r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
entries.append(url) entries.append(url if url.startswith('http') else 'http:' + url)
# Look for embed_in_page embeds [2] # Look for embed_in_page embeds [2]
for video_id, account_id, player_id, embed in re.findall( for video_id, account_id, player_id, embed in re.findall(
@ -425,7 +422,7 @@ class BrightcoveNewIE(InfoExtractor):
# According to [4] data-video-id may be prefixed with ref: # According to [4] data-video-id may be prefixed with ref:
r'''(?sx) r'''(?sx)
<video[^>]+ <video[^>]+
data-video-id=["\']((?:ref:)?\d+)["\'][^>]*>.*? data-video-id=["\'](\d+|ref:[^"\']+)["\'][^>]*>.*?
</video>.*? </video>.*?
<script[^>]+ <script[^>]+
src=["\'](?:https?:)?//players\.brightcove\.net/ src=["\'](?:https?:)?//players\.brightcove\.net/
@ -459,12 +456,11 @@ class BrightcoveNewIE(InfoExtractor):
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1', r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk') webpage, 'policy key', group='pk')
req = sanitized_Request( api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id)
'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s'
% (account_id, video_id),
headers={'Accept': 'application/json;pk=%s' % policy_key})
try: try:
json_data = self._download_json(req, video_id) json_data = self._download_json(api_url, video_id, headers={
'Accept': 'application/json;pk=%s' % policy_key
})
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
json_data = self._parse_json(e.cause.read().decode(), video_id) json_data = self._parse_json(e.cause.read().decode(), video_id)
@ -482,8 +478,11 @@ class BrightcoveNewIE(InfoExtractor):
if not src: if not src:
continue continue
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
src, video_id, 'mp4', entry_protocol='m3u8_native', src, video_id, 'mp4', m3u8_id='hls', fatal=False))
m3u8_id='hls', fatal=False)) elif source_type == 'application/dash+xml':
if not src:
continue
formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False))
else: else:
streaming_src = source.get('streaming_src') streaming_src = source.get('streaming_src')
stream_name, app_name = source.get('stream_name'), source.get('app_name') stream_name, app_name = source.get('stream_name'), source.get('app_name')
@ -491,15 +490,23 @@ class BrightcoveNewIE(InfoExtractor):
continue continue
tbr = float_or_none(source.get('avg_bitrate'), 1000) tbr = float_or_none(source.get('avg_bitrate'), 1000)
height = int_or_none(source.get('height')) height = int_or_none(source.get('height'))
width = int_or_none(source.get('width'))
f = { f = {
'tbr': tbr, 'tbr': tbr,
'width': int_or_none(source.get('width')),
'height': height,
'filesize': int_or_none(source.get('size')), 'filesize': int_or_none(source.get('size')),
'container': container, 'container': container,
'vcodec': source.get('codec'), 'ext': container.lower(),
'ext': source.get('container').lower(),
} }
if width == 0 and height == 0:
f.update({
'vcodec': 'none',
})
else:
f.update({
'width': width,
'height': height,
'vcodec': source.get('codec'),
})
def build_format_id(kind): def build_format_id(kind):
format_id = kind format_id = kind

View File

@ -16,7 +16,7 @@ from ..utils import (
class CamdemyIE(InfoExtractor): class CamdemyIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# single file # single file
'url': 'http://www.camdemy.com/media/5181/', 'url': 'http://www.camdemy.com/media/5181/',
@ -104,7 +104,7 @@ class CamdemyIE(InfoExtractor):
class CamdemyFolderIE(InfoExtractor): class CamdemyFolderIE(InfoExtractor):
_VALID_URL = r'http://www.camdemy.com/folder/(?P<id>\d+)' _VALID_URL = r'https?://www.camdemy.com/folder/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# links with trailing slash # links with trailing slash
'url': 'http://www.camdemy.com/folder/450', 'url': 'http://www.camdemy.com/folder/450',

View File

@ -11,7 +11,7 @@ from ..utils import (
class CBSNewsIE(ThePlatformIE): class CBSNewsIE(ThePlatformIE):
IE_DESC = 'CBS News' IE_DESC = 'CBS News'
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
_TESTS = [ _TESTS = [
{ {
@ -78,7 +78,7 @@ class CBSNewsIE(ThePlatformIE):
pid = item.get('media' + format_id) pid = item.get('media' + format_id)
if not pid: if not pid:
continue continue
release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?format=SMIL&mbr=true' % pid release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' % pid
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid) tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid)
formats.extend(tp_formats) formats.extend(tp_formats)
subtitles = self._merge_subtitles(subtitles, tp_subtitles) subtitles = self._merge_subtitles(subtitles, tp_subtitles)
@ -96,7 +96,7 @@ class CBSNewsIE(ThePlatformIE):
class CBSNewsLiveVideoIE(InfoExtractor): class CBSNewsLiveVideoIE(InfoExtractor):
IE_DESC = 'CBS News Live Videos' IE_DESC = 'CBS News Live Videos'
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)' _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
_TEST = { _TEST = {
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/', 'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',

View File

@ -6,7 +6,7 @@ from .common import InfoExtractor
class CBSSportsIE(InfoExtractor): class CBSSportsIE(InfoExtractor):
_VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)' _VALID_URL = r'https?://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)'
_TEST = { _TEST = {
'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s', 'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s',

96
youtube_dl/extractor/cda.py Executable file
View File

@ -0,0 +1,96 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
decode_packed_codes,
ExtractorError,
parse_duration
)
class CDAIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
_TESTS = [{
'url': 'http://www.cda.pl/video/5749950c',
'md5': '6f844bf51b15f31fae165365707ae970',
'info_dict': {
'id': '5749950c',
'ext': 'mp4',
'height': 720,
'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
'duration': 39
}
}, {
'url': 'http://www.cda.pl/video/57413289',
'md5': 'a88828770a8310fc00be6c95faf7f4d5',
'info_dict': {
'id': '57413289',
'ext': 'mp4',
'title': 'Lądowanie na lotnisku na Maderze',
'duration': 137
}
}, {
'url': 'http://ebd.cda.pl/0x0/5749950c',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id)
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
raise ExtractorError('This video is only available for premium users.', expected=True)
title = self._html_search_regex(r'<title>(.+?)</title>', webpage, 'title')
formats = []
info_dict = {
'id': video_id,
'title': title,
'formats': formats,
'duration': None,
}
def extract_format(page, version):
unpacked = decode_packed_codes(page)
format_url = self._search_regex(
r"url:\\'(.+?)\\'", unpacked, '%s url' % version, fatal=False)
if not format_url:
return
f = {
'url': format_url,
}
m = re.search(
r'<a[^>]+data-quality="(?P<format_id>[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P<height>[0-9]+)p',
page)
if m:
f.update({
'format_id': m.group('format_id'),
'height': int(m.group('height')),
})
info_dict['formats'].append(f)
if not info_dict['duration']:
info_dict['duration'] = parse_duration(self._search_regex(
r"duration:\\'(.+?)\\'", unpacked, 'duration', fatal=False))
extract_format(webpage, 'default')
for href, resolution in re.findall(
r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
webpage):
webpage = self._download_webpage(
href, video_id, 'Downloading %s version information' % resolution, fatal=False)
if not webpage:
# Manually report warning because empty page is returned when
# invalid version is requested.
self.report_warning('Unable to download %s version information' % resolution)
continue
extract_format(webpage, resolution)
self._sort_formats(formats)
return info_dict

View File

@ -129,7 +129,8 @@ class CeskaTelevizeIE(InfoExtractor):
formats = [] formats = []
for format_id, stream_url in item['streamUrls'].items(): for format_id, stream_url in item['streamUrls'].items():
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
stream_url, playlist_id, 'mp4', entry_protocol='m3u8_native')) stream_url, playlist_id, 'mp4',
entry_protocol='m3u8_native', fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
item_id = item.get('id') or item['assetId'] item_id = item.get('id') or item['assetId']

View File

@ -19,7 +19,7 @@ def _decode(s):
class CliphunterIE(InfoExtractor): class CliphunterIE(InfoExtractor):
IE_NAME = 'cliphunter' IE_NAME = 'cliphunter'
_VALID_URL = r'''(?x)http://(?:www\.)?cliphunter\.com/w/ _VALID_URL = r'''(?x)https?://(?:www\.)?cliphunter\.com/w/
(?P<id>[0-9]+)/ (?P<id>[0-9]+)/
(?P<seo>.+?)(?:$|[#\?]) (?P<seo>.+?)(?:$|[#\?])
''' '''

View File

@ -8,7 +8,7 @@ from ..utils import (
class ClipsyndicateIE(InfoExtractor): class ClipsyndicateIE(InfoExtractor):
_VALID_URL = r'http://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)' _VALID_URL = r'https?://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe', 'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe',

View File

@ -12,7 +12,7 @@ from ..utils import (
class ClubicIE(InfoExtractor): class ClubicIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html' _VALID_URL = r'https?://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html', 'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html',

View File

@ -60,7 +60,7 @@ class CNETIE(ThePlatformIE):
for (fkey, vid) in vdata['files'].items(): for (fkey, vid) in vdata['files'].items():
if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']: if fkey == 'hls_phone' and 'hls_tablet' in vdata['files']:
continue continue
release_url = 'http://link.theplatform.com/s/kYEXFC/%s?format=SMIL&mbr=true' % vid release_url = 'http://link.theplatform.com/s/kYEXFC/%s?mbr=true' % vid
if fkey == 'hds': if fkey == 'hds':
release_url += '&manifest=f4m' release_url += '&manifest=f4m'
tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey) tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % fkey)

View File

@ -11,7 +11,7 @@ from ..utils import (
class ComCarCoffIE(InfoExtractor): class ComCarCoffIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)' _VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'
_TESTS = [{ _TESTS = [{
'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/', 'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
'info_dict': { 'info_dict': {

View File

@ -866,6 +866,7 @@ class InfoExtractor(object):
proto_preference = 0 if 'url' in f and determine_protocol(f) in ['http', 'https'] else -0.1 proto_preference = 0 if 'url' in f and determine_protocol(f) in ['http', 'https'] else -0.1
if f.get('vcodec') == 'none': # audio only if f.get('vcodec') == 'none': # audio only
preference -= 50
if self._downloader.params.get('prefer_free_formats'): if self._downloader.params.get('prefer_free_formats'):
ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus'] ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
else: else:
@ -876,6 +877,8 @@ class InfoExtractor(object):
except ValueError: except ValueError:
audio_ext_preference = -1 audio_ext_preference = -1
else: else:
if f.get('acodec') == 'none': # video only
preference -= 40
if self._downloader.params.get('prefer_free_formats'): if self._downloader.params.get('prefer_free_formats'):
ORDER = ['flv', 'mp4', 'webm'] ORDER = ['flv', 'mp4', 'webm']
else: else:

View File

@ -0,0 +1,36 @@
from __future__ import unicode_literals
import os
from .common import InfoExtractor
from ..compat import (
compat_urllib_parse_unquote,
compat_urlparse,
)
from ..utils import url_basename
class RtmpIE(InfoExtractor):
IE_DESC = False # Do not list
_VALID_URL = r'(?i)rtmp[est]?://.+'
_TESTS = [{
'url': 'rtmp://cp44293.edgefcs.net/ondemand?auth=daEcTdydfdqcsb8cZcDbAaCbhamacbbawaS-bw7dBb-bWG-GqpGFqCpNCnGoyL&aifp=v001&slist=public/unsecure/audio/2c97899446428e4301471a8cb72b4b97--audio--pmg-20110908-0900a_flv_aac_med_int.mp4',
'only_matching': True,
}, {
'url': 'rtmp://edge.live.hitbox.tv/live/dimak',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
title = compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
return {
'id': video_id,
'title': title,
'formats': [{
'url': url,
'ext': 'flv',
'format_id': compat_urlparse.urlparse(url).scheme,
}],
}

View File

@ -45,7 +45,7 @@ class CondeNastIE(InfoExtractor):
'wmagazine': 'W Magazine', 'wmagazine': 'W Magazine',
} }
_VALID_URL = r'http://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys()) _VALID_URL = r'https?://(?:video|www|player)\.(?P<site>%s)\.com/(?P<type>watch|series|video|embed(?:js)?)/(?P<id>[^/?#]+)' % '|'.join(_SITES.keys())
IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values())) IE_DESC = 'Condé Nast media group: %s' % ', '.join(sorted(_SITES.values()))
EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed(?:js)?)/.+?' % '|'.join(_SITES.keys()) EMBED_URL = r'(?:https?:)?//player\.(?P<site>%s)\.com/(?P<type>embed(?:js)?)/.+?' % '|'.join(_SITES.keys())

View File

@ -54,7 +54,7 @@ class CrunchyrollBaseIE(InfoExtractor):
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None): def _download_webpage(self, url_or_request, *args, **kwargs):
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request) request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
else sanitized_Request(url_or_request)) else sanitized_Request(url_or_request))
# Accept-Language must be set explicitly to accept any language to avoid issues # Accept-Language must be set explicitly to accept any language to avoid issues
@ -65,8 +65,7 @@ class CrunchyrollBaseIE(InfoExtractor):
# Crunchyroll to not work in georestriction cases in some browsers that don't place # Crunchyroll to not work in georestriction cases in some browsers that don't place
# the locale lang first in header. However allowing any language seems to workaround the issue. # the locale lang first in header. However allowing any language seems to workaround the issue.
request.add_header('Accept-Language', '*') request.add_header('Accept-Language', '*')
return super(CrunchyrollBaseIE, self)._download_webpage( return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
request, video_id, note, errnote, fatal, tries, timeout, encoding)
@staticmethod @staticmethod
def _add_skip_wall(url): def _add_skip_wall(url):

View File

@ -15,7 +15,7 @@ from .senateisvp import SenateISVPIE
class CSpanIE(InfoExtractor): class CSpanIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)' _VALID_URL = r'https?://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
IE_DESC = 'C-SPAN' IE_DESC = 'C-SPAN'
_TESTS = [{ _TESTS = [{
'url': 'http://www.c-span.org/video/?313572-1/HolderonV', 'url': 'http://www.c-span.org/video/?313572-1/HolderonV',

View File

@ -8,7 +8,7 @@ from ..utils import parse_iso8601, ExtractorError
class CtsNewsIE(InfoExtractor): class CtsNewsIE(InfoExtractor):
IE_DESC = '華視新聞' IE_DESC = '華視新聞'
# https connection failed (Connection reset) # https connection failed (Connection reset)
_VALID_URL = r'http://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html' _VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html', 'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',
'md5': 'a9875cb790252b08431186d741beaabe', 'md5': 'a9875cb790252b08431186d741beaabe',

View File

@ -6,7 +6,7 @@ from ..compat import compat_str
class DctpTvIE(InfoExtractor): class DctpTvIE(InfoExtractor):
_VALID_URL = r'http://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$' _VALID_URL = r'https?://www.dctp.tv/(#/)?filme/(?P<id>.+?)/$'
_TEST = { _TEST = {
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/', 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
'info_dict': { 'info_dict': {

View File

@ -5,7 +5,7 @@ from .common import InfoExtractor
class DefenseGouvFrIE(InfoExtractor): class DefenseGouvFrIE(InfoExtractor):
IE_NAME = 'defense.gouv.fr' IE_NAME = 'defense.gouv.fr'
_VALID_URL = r'http://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)' _VALID_URL = r'https?://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'
_TEST = { _TEST = {
'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1', 'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1',

View File

@ -9,7 +9,7 @@ from ..compat import compat_str
class DiscoveryIE(InfoExtractor): class DiscoveryIE(InfoExtractor):
_VALID_URL = r'''(?x)http://(?:www\.)?(?: _VALID_URL = r'''(?x)https?://(?:www\.)?(?:
discovery| discovery|
investigationdiscovery| investigationdiscovery|
discoverylife| discoverylife|

View File

@ -10,7 +10,7 @@ from ..compat import (compat_str, compat_basestring)
class DouyuTVIE(InfoExtractor): class DouyuTVIE(InfoExtractor):
IE_DESC = '斗鱼' IE_DESC = '斗鱼'
_VALID_URL = r'http://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)' _VALID_URL = r'https?://(?:www\.)?douyutv\.com/(?P<id>[A-Za-z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.douyutv.com/iseven', 'url': 'http://www.douyutv.com/iseven',
'info_dict': { 'info_dict': {

View File

@ -10,7 +10,7 @@ from ..utils import int_or_none
class DPlayIE(InfoExtractor): class DPlayIE(InfoExtractor):
_VALID_URL = r'http://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?P<domain>it\.dplay\.com|www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/', 'url': 'http://it.dplay.com/take-me-out/stagione-1-episodio-25/',

View File

@ -7,7 +7,7 @@ from .zdf import ZDFIE
class DreiSatIE(ZDFIE): class DreiSatIE(ZDFIE):
IE_NAME = '3sat' IE_NAME = '3sat'
_VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' _VALID_URL = r'(?:https?://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php|mediathek\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$'
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',

View File

@ -15,7 +15,7 @@ class DVTVIE(InfoExtractor):
IE_NAME = 'dvtv' IE_NAME = 'dvtv'
IE_DESC = 'http://video.aktualne.cz/' IE_DESC = 'http://video.aktualne.cz/'
_VALID_URL = r'http://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})' _VALID_URL = r'https?://video\.aktualne\.cz/(?:[^/]+/)+r~(?P<id>[0-9a-f]{32})'
_TESTS = [{ _TESTS = [{
'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/', 'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/',

View File

@ -7,7 +7,7 @@ from .common import InfoExtractor
class EchoMskIE(InfoExtractor): class EchoMskIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?echo\.msk\.ru/sounds/(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'http://www.echo.msk.ru/sounds/1464134.html', 'url': 'http://www.echo.msk.ru/sounds/1464134.html',
'md5': '2e44b3b78daff5b458e4dbc37f191f7c', 'md5': '2e44b3b78daff5b458e4dbc37f191f7c',

View File

@ -8,7 +8,7 @@ from .common import InfoExtractor
class ExfmIE(InfoExtractor): class ExfmIE(InfoExtractor):
IE_NAME = 'exfm' IE_NAME = 'exfm'
IE_DESC = 'ex.fm' IE_DESC = 'ex.fm'
_VALID_URL = r'http://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)' _VALID_URL = r'https?://(?:www\.)?ex\.fm/song/(?P<id>[^/]+)'
_SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream' _SOUNDCLOUD_URL = r'http://(?:www\.)?api\.soundcloud\.com/tracks/([^/]+)/stream'
_TESTS = [ _TESTS = [
{ {

View File

@ -17,7 +17,7 @@ from ..utils import (
class FC2IE(InfoExtractor): class FC2IE(InfoExtractor):
_VALID_URL = r'^http://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)' _VALID_URL = r'^https?://video\.fc2\.com/(?:[^/]+/)*content/(?P<id>[^/]+)'
IE_NAME = 'fc2' IE_NAME = 'fc2'
_NETRC_MACHINE = 'fc2' _NETRC_MACHINE = 'fc2'
_TESTS = [{ _TESTS = [{

View File

@ -4,7 +4,7 @@ from .common import InfoExtractor
class FirstpostIE(InfoExtractor): class FirstpostIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html' _VALID_URL = r'https?://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html'
_TEST = { _TEST = {
'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html', 'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html',

View File

@ -8,7 +8,7 @@ from ..utils import int_or_none
class FirstTVIE(InfoExtractor): class FirstTVIE(InfoExtractor):
IE_NAME = '1tv' IE_NAME = '1tv'
IE_DESC = 'Первый канал' IE_DESC = 'Первый канал'
_VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)' _VALID_URL = r'https?://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.1tv.ru/videoarchive/73390', 'url': 'http://www.1tv.ru/videoarchive/73390',

View File

@ -10,7 +10,7 @@ from ..utils import (
class FKTVIE(InfoExtractor): class FKTVIE(InfoExtractor):
IE_NAME = 'fernsehkritik.tv' IE_NAME = 'fernsehkritik.tv'
_VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?' _VALID_URL = r'https?://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
_TEST = { _TEST = {
'url': 'http://fernsehkritik.tv/folge-1', 'url': 'http://fernsehkritik.tv/folge-1',

View File

@ -5,7 +5,7 @@ from .common import InfoExtractor
class FootyRoomIE(InfoExtractor): class FootyRoomIE(InfoExtractor):
_VALID_URL = r'http://footyroom\.com/(?P<id>[^/]+)' _VALID_URL = r'https?://footyroom\.com/(?P<id>[^/]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/', 'url': 'http://footyroom.com/schalke-04-0-2-real-madrid-2015-02/',
'info_dict': { 'info_dict': {

View File

@ -4,7 +4,7 @@ from .common import InfoExtractor
class FoxgayIE(InfoExtractor): class FoxgayIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml' _VALID_URL = r'https?://(?:www\.)?foxgay\.com/videos/(?:\S+-)?(?P<id>\d+)\.shtml'
_TEST = { _TEST = {
'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml', 'url': 'http://foxgay.com/videos/fuck-turkish-style-2582.shtml',
'md5': '80d72beab5d04e1655a56ad37afe6841', 'md5': '80d72beab5d04e1655a56ad37afe6841',

View File

@ -6,7 +6,7 @@ from ..utils import int_or_none
class FranceInterIE(InfoExtractor): class FranceInterIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)'
_TEST = { _TEST = {
'url': 'http://www.franceinter.fr/player/reecouter?play=793962', 'url': 'http://www.franceinter.fr/player/reecouter?play=793962',
'md5': '4764932e466e6f6c79c317d2e74f6884', 'md5': '4764932e466e6f6c79c317d2e74f6884',

View File

@ -60,28 +60,31 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
video_id, 'Downloading f4m manifest token', fatal=False) video_id, 'Downloading f4m manifest token', fatal=False)
if f4m_url: if f4m_url:
formats.extend(self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, 1, format_id)) f4m_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
video_id, f4m_id=format_id, fatal=False))
elif ext == 'm3u8': elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id)) formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id=format_id, fatal=False))
elif video_url.startswith('rtmp'): elif video_url.startswith('rtmp'):
formats.append({ formats.append({
'url': video_url, 'url': video_url,
'format_id': 'rtmp-%s' % format_id, 'format_id': 'rtmp-%s' % format_id,
'ext': 'flv', 'ext': 'flv',
'preference': 1,
}) })
else: else:
formats.append({ if self._is_valid_url(video_url, video_id, format_id):
'url': video_url, formats.append({
'format_id': format_id, 'url': video_url,
'preference': -1, 'format_id': format_id,
}) })
self._sort_formats(formats) self._sort_formats(formats)
title = info['titre'] title = info['titre']
subtitle = info.get('sous_titre') subtitle = info.get('sous_titre')
if subtitle: if subtitle:
title += ' - %s' % subtitle title += ' - %s' % subtitle
title = title.strip()
subtitles = {} subtitles = {}
subtitles_list = [{ subtitles_list = [{
@ -125,13 +128,13 @@ class PluzzIE(FranceTVBaseInfoExtractor):
class FranceTvInfoIE(FranceTVBaseInfoExtractor): class FranceTvInfoIE(FranceTVBaseInfoExtractor):
IE_NAME = 'francetvinfo.fr' IE_NAME = 'francetvinfo.fr'
_VALID_URL = r'https?://(?:www|mobile)\.francetvinfo\.fr/.*/(?P<title>.+)\.html' _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
'info_dict': { 'info_dict': {
'id': '84981923', 'id': '84981923',
'ext': 'flv', 'ext': 'mp4',
'title': 'Soir 3', 'title': 'Soir 3',
'upload_date': '20130826', 'upload_date': '20130826',
'timestamp': 1377548400, 'timestamp': 1377548400,
@ -139,6 +142,10 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
'fr': 'mincount:2', 'fr': 'mincount:2',
}, },
}, },
'params': {
# m3u8 downloads
'skip_download': True,
},
}, { }, {
'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html', 'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
'info_dict': { 'info_dict': {
@ -155,11 +162,32 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html', 'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
'md5': 'f485bda6e185e7d15dbc69b72bae993e', 'md5': 'f485bda6e185e7d15dbc69b72bae993e',
'info_dict': { 'info_dict': {
'id': '556e03339473995ee145930c', 'id': 'NI_173343',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Les entreprises familiales : le secret de la réussite', 'title': 'Les entreprises familiales : le secret de la réussite',
'thumbnail': 're:^https?://.*\.jpe?g$', 'thumbnail': 're:^https?://.*\.jpe?g$',
} 'timestamp': 1433273139,
'upload_date': '20150602',
},
'params': {
# m3u8 downloads
'skip_download': True,
},
}, {
'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html',
'md5': 'f485bda6e185e7d15dbc69b72bae993e',
'info_dict': {
'id': 'NI_657393',
'ext': 'mp4',
'title': 'Olivier Monthus, réalisateur de "Bretagne, le choix de lArmor"',
'description': 'md5:a3264114c9d29aeca11ced113c37b16c',
'thumbnail': 're:^https?://.*\.jpe?g$',
'timestamp': 1458300695,
'upload_date': '20160318',
},
'params': {
'skip_download': True,
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -172,7 +200,9 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
return self.url_result(dmcloud_url, 'DailymotionCloud') return self.url_result(dmcloud_url, 'DailymotionCloud')
video_id, catalogue = self._search_regex( video_id, catalogue = self._search_regex(
r'id-video=([^@]+@[^"]+)', webpage, 'video id').split('@') (r'id-video=([^@]+@[^"]+)',
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
webpage, 'video id').split('@')
return self._extract_video(video_id, catalogue) return self._extract_video(video_id, catalogue)

View File

@ -5,7 +5,7 @@ from ..utils import ExtractorError
class FreeVideoIE(InfoExtractor): class FreeVideoIE(InfoExtractor):
_VALID_URL = r'^http://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])' _VALID_URL = r'^https?://www.freevideo.cz/vase-videa/(?P<id>[^.]+)\.html(?:$|[?#])'
_TEST = { _TEST = {
'url': 'http://www.freevideo.cz/vase-videa/vysukany-zadecek-22033.html', 'url': 'http://www.freevideo.cz/vase-videa/vysukany-zadecek-22033.html',

View File

@ -10,7 +10,7 @@ from .youtube import YoutubeIE
class GamekingsIE(InfoExtractor): class GamekingsIE(InfoExtractor):
_VALID_URL = r'http://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)' _VALID_URL = r'https?://www\.gamekings\.nl/(?:videos|nieuws)/(?P<id>[^/]+)'
_TESTS = [{ _TESTS = [{
# YouTube embed video # YouTube embed video
'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/', 'url': 'http://www.gamekings.nl/videos/phoenix-wright-ace-attorney-dual-destinies-review/',

View File

@ -14,7 +14,7 @@ from ..utils import (
class GameSpotIE(InfoExtractor): class GameSpotIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?' _VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?'
_TESTS = [{ _TESTS = [{
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', 'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
'md5': 'b2a30deaa8654fcccd43713a6b6a4825', 'md5': 'b2a30deaa8654fcccd43713a6b6a4825',

View File

@ -13,7 +13,7 @@ from ..utils import (
class GameStarIE(InfoExtractor): class GameStarIE(InfoExtractor):
_VALID_URL = r'http://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html' _VALID_URL = r'https?://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html'
_TEST = { _TEST = {
'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html', 'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html',
'md5': '96974ecbb7fd8d0d20fca5a00810cea7', 'md5': '96974ecbb7fd8d0d20fca5a00810cea7',

View File

@ -9,7 +9,7 @@ from ..utils import (
class GametrailersIE(InfoExtractor): class GametrailersIE(InfoExtractor):
_VALID_URL = r'http://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)' _VALID_URL = r'https?://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)'
_TEST = { _TEST = {
'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review', 'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review',

View File

@ -59,6 +59,7 @@ from .videomore import VideomoreIE
from .googledrive import GoogleDriveIE from .googledrive import GoogleDriveIE
from .jwplatform import JWPlatformIE from .jwplatform import JWPlatformIE
from .digiteka import DigitekaIE from .digiteka import DigitekaIE
from .instagram import InstagramIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -239,6 +240,35 @@ class GenericIE(InfoExtractor):
'format': 'bestvideo', 'format': 'bestvideo',
}, },
}, },
# m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
{
'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
'info_dict': {
'id': 'content',
'ext': 'mp4',
'title': 'content',
'formats': 'mincount:8',
},
'params': {
# m3u8 downloads
'skip_download': True,
}
},
# m3u8 served with Content-Type: text/plain
{
'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
'info_dict': {
'id': 'index',
'ext': 'mp4',
'title': 'index',
'upload_date': '20140720',
'formats': 'mincount:11',
},
'params': {
# m3u8 downloads
'skip_download': True,
}
},
# google redirect # google redirect
{ {
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@ -1245,14 +1275,13 @@ class GenericIE(InfoExtractor):
info_dict = { info_dict = {
'id': video_id, 'id': video_id,
'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]), 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
} }
# Check for direct link to a video # Check for direct link to a video
content_type = head_response.headers.get('Content-Type', '') content_type = head_response.headers.get('Content-Type', '').lower()
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>.+)$', content_type) m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
if m: if m:
upload_date = unified_strdate(
head_response.headers.get('Last-Modified'))
format_id = m.group('format_id') format_id = m.group('format_id')
if format_id.endswith('mpegurl'): if format_id.endswith('mpegurl'):
formats = self._extract_m3u8_formats(url, video_id, 'mp4') formats = self._extract_m3u8_formats(url, video_id, 'mp4')
@ -1264,11 +1293,8 @@ class GenericIE(InfoExtractor):
'url': url, 'url': url,
'vcodec': 'none' if m.group('type') == 'audio' else None 'vcodec': 'none' if m.group('type') == 'audio' else None
}] }]
info_dict.update({ info_dict['direct'] = True
'direct': True, info_dict['formats'] = formats
'formats': formats,
'upload_date': upload_date,
})
return info_dict return info_dict
if not self._downloader.params.get('test', False) and not is_intentional: if not self._downloader.params.get('test', False) and not is_intentional:
@ -1289,18 +1315,21 @@ class GenericIE(InfoExtractor):
request.add_header('Accept-Encoding', '*') request.add_header('Accept-Encoding', '*')
full_response = self._request_webpage(request, video_id) full_response = self._request_webpage(request, video_id)
first_bytes = full_response.read(512)
# Is it an M3U playlist?
if first_bytes.startswith(b'#EXTM3U'):
info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
return info_dict
# Maybe it's a direct link to a video? # Maybe it's a direct link to a video?
# Be careful not to download the whole thing! # Be careful not to download the whole thing!
first_bytes = full_response.read(512)
if not is_html(first_bytes): if not is_html(first_bytes):
self._downloader.report_warning( self._downloader.report_warning(
'URL could be a direct video link, returning it as such.') 'URL could be a direct video link, returning it as such.')
upload_date = unified_strdate(
head_response.headers.get('Last-Modified'))
info_dict.update({ info_dict.update({
'direct': True, 'direct': True,
'url': url, 'url': url,
'upload_date': upload_date,
}) })
return info_dict return info_dict
@ -1881,6 +1910,19 @@ class GenericIE(InfoExtractor):
self._proto_relative_url(unescapeHTML(mobj.group(1))), self._proto_relative_url(unescapeHTML(mobj.group(1))),
'AdobeTVVideo') 'AdobeTVVideo')
# Look for Vine embeds
mobj = re.search(
r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
webpage)
if mobj is not None:
return self.url_result(
self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
# Look for Instagram embeds
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
if instagram_embed_url is not None:
return self.url_result(instagram_embed_url, InstagramIE.ie_key())
def check_video(vurl): def check_video(vurl):
if YoutubeIE.suitable(vurl): if YoutubeIE.suitable(vurl):
return True return True

122
youtube_dl/extractor/hbo.py Normal file
View File

@ -0,0 +1,122 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
xpath_text,
xpath_element,
int_or_none,
parse_duration,
)
class HBOIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
'md5': '1c33253f0c7782142c993c0ba62a8753',
'info_dict': {
'id': '1437839',
'ext': 'mp4',
'title': 'Ep. 64 Clip: Encryption',
}
}
_FORMATS_INFO = {
'1920': {
'width': 1280,
'height': 720,
},
'640': {
'width': 768,
'height': 432,
},
'highwifi': {
'width': 640,
'height': 360,
},
'high3g': {
'width': 640,
'height': 360,
},
'medwifi': {
'width': 400,
'height': 224,
},
'med3g': {
'width': 400,
'height': 224,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_xml(
'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id)
title = xpath_text(video_data, 'title', 'title', True)
formats = []
for source in xpath_element(video_data, 'videos', 'sources', True):
if source.tag == 'size':
path = xpath_text(source, './/path')
if not path:
continue
width = source.attrib.get('width')
format_info = self._FORMATS_INFO.get(width, {})
height = format_info.get('height')
fmt = {
'url': path,
'format_id': 'http%s' % ('-%dp' % height if height else ''),
'width': format_info.get('width'),
'height': height,
}
rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', path)
if rtmp:
fmt.update({
'url': rtmp.group('url'),
'play_path': rtmp.group('playpath'),
'app': rtmp.group('app'),
'ext': 'flv',
'format_id': fmt['format_id'].replace('http', 'rtmp'),
})
formats.append(fmt)
else:
video_url = source.text
if not video_url:
continue
if source.tag == 'tarball':
formats.extend(self._extract_m3u8_formats(
video_url.replace('.tar', '/base_index_w8.m3u8'),
video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
else:
format_info = self._FORMATS_INFO.get(source.tag, {})
formats.append({
'format_id': 'http-%s' % source.tag,
'url': video_url,
'width': format_info.get('width'),
'height': format_info.get('height'),
})
self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
thumbnails = []
card_sizes = xpath_element(video_data, 'titleCardSizes')
if card_sizes is not None:
for size in card_sizes:
path = xpath_text(size, 'path')
if not path:
continue
width = int_or_none(size.get('width'))
thumbnails.append({
'id': width,
'url': path,
'width': width,
})
return {
'id': video_id,
'title': title,
'duration': parse_duration(xpath_element(video_data, 'duration/tv14')),
'formats': formats,
'thumbnails': thumbnails,
}

View File

@ -12,7 +12,7 @@ from ..utils import (
class HotNewHipHopIE(InfoExtractor): class HotNewHipHopIE(InfoExtractor):
_VALID_URL = r'http://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html' _VALID_URL = r'https?://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
_TEST = { _TEST = {
'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html', 'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96', 'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',

View File

@ -12,7 +12,7 @@ from ..utils import (
class HypemIE(InfoExtractor): class HypemIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/' _VALID_URL = r'https?://(?:www\.)?hypem\.com/track/(?P<id>[^/]+)/'
_TEST = { _TEST = {
'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME', 'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME',
'md5': 'b9cc91b5af8995e9f0c1cee04c575828', 'md5': 'b9cc91b5af8995e9f0c1cee04c575828',

View File

@ -12,7 +12,7 @@ from ..utils import (
class ImdbIE(InfoExtractor): class ImdbIE(InfoExtractor):
IE_NAME = 'imdb' IE_NAME = 'imdb'
IE_DESC = 'Internet Movie Database trailers' IE_DESC = 'Internet Movie Database trailers'
_VALID_URL = r'http://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)' _VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/imdb/vi(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'http://www.imdb.com/video/imdb/vi2524815897', 'url': 'http://www.imdb.com/video/imdb/vi2524815897',
@ -70,7 +70,7 @@ class ImdbIE(InfoExtractor):
class ImdbListIE(InfoExtractor): class ImdbListIE(InfoExtractor):
IE_NAME = 'imdb:list' IE_NAME = 'imdb:list'
IE_DESC = 'Internet Movie Database lists' IE_DESC = 'Internet Movie Database lists'
_VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})' _VALID_URL = r'https?://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
_TEST = { _TEST = {
'url': 'http://www.imdb.com/list/JFs9NWw6XI0', 'url': 'http://www.imdb.com/list/JFs9NWw6XI0',
'info_dict': { 'info_dict': {

View File

@ -4,8 +4,10 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
get_element_by_attribute,
int_or_none, int_or_none,
limit_length, limit_length,
lowercase_escape,
) )
@ -38,6 +40,18 @@ class InstagramIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
@staticmethod
def _extract_embed_url(webpage):
blockquote_el = get_element_by_attribute(
'class', 'instagram-media', webpage)
if blockquote_el is None:
return
mobj = re.search(
r'<a[^>]+href=([\'"])(?P<link>[^\'"]+)\1', blockquote_el)
if mobj:
return mobj.group('link')
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -46,6 +60,8 @@ class InstagramIE(InfoExtractor):
webpage, 'uploader id', fatal=False) webpage, 'uploader id', fatal=False)
desc = self._search_regex( desc = self._search_regex(
r'"caption":"(.+?)"', webpage, 'description', default=None) r'"caption":"(.+?)"', webpage, 'description', default=None)
if desc is not None:
desc = lowercase_escape(desc)
return { return {
'id': video_id, 'id': video_id,

View File

@ -1,4 +1,4 @@
# -*- coding: utf-8 -*- # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
@ -6,6 +6,8 @@ import time
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
js_to_json,
sanitized_Request, sanitized_Request,
) )
@ -30,8 +32,7 @@ class IPrimaIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
@ -43,9 +44,42 @@ class IPrimaIE(InfoExtractor):
req.add_header('Referer', url) req.add_header('Referer', url)
playerpage = self._download_webpage(req, video_id, note='Downloading player') playerpage = self._download_webpage(req, video_id, note='Downloading player')
m3u8_url = self._search_regex(r"'src': '([^']+\.m3u8)'", playerpage, 'm3u8 url') formats = []
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') def extract_formats(format_url, format_key=None, lang=None):
ext = determine_ext(format_url)
new_formats = []
if format_key == 'hls' or ext == 'm3u8':
new_formats = self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
elif format_key == 'dash' or ext == 'mpd':
return
new_formats = self._extract_mpd_formats(
format_url, video_id, mpd_id='dash', fatal=False)
if lang:
for f in new_formats:
if not f.get('language'):
f['language'] = lang
formats.extend(new_formats)
options = self._parse_json(
self._search_regex(
r'(?s)var\s+playerOptions\s*=\s*({.+?});',
playerpage, 'player options', default='{}'),
video_id, transform_source=js_to_json, fatal=False)
if options:
for key, tracks in options.get('tracks', {}).items():
if not isinstance(tracks, list):
continue
for track in tracks:
src = track.get('src')
if src:
extract_formats(src, key.lower(), track.get('lang'))
if not formats:
for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage):
extract_formats(src)
self._sort_formats(formats) self._sort_formats(formats)

View File

@ -165,7 +165,7 @@ class IqiyiIE(InfoExtractor):
IE_NAME = 'iqiyi' IE_NAME = 'iqiyi'
IE_DESC = '爱奇艺' IE_DESC = '爱奇艺'
_VALID_URL = r'http://(?:[^.]+\.)?iqiyi\.com/.+\.html' _VALID_URL = r'https?://(?:[^.]+\.)?iqiyi\.com/.+\.html'
_NETRC_MACHINE = 'iqiyi' _NETRC_MACHINE = 'iqiyi'

View File

@ -9,7 +9,7 @@ from .youtube import YoutubeIE
class JadoreCettePubIE(InfoExtractor): class JadoreCettePubIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html' _VALID_URL = r'https?://(?:www\.)?jadorecettepub\.com/[0-9]{4}/[0-9]{2}/(?P<id>.*?)\.html'
_TEST = { _TEST = {
'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html', 'url': 'http://www.jadorecettepub.com/2010/12/star-wars-massacre-par-les-japonais.html',

View File

@ -8,7 +8,7 @@ from .common import InfoExtractor
class JeuxVideoIE(InfoExtractor): class JeuxVideoIE(InfoExtractor):
_VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)\.htm' _VALID_URL = r'https?://.*?\.jeuxvideo\.com/.*/(.*?)\.htm'
_TESTS = [{ _TESTS = [{
'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm', 'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm',

View File

@ -9,7 +9,7 @@ from ..utils import (
class KaraoketvIE(InfoExtractor): class KaraoketvIE(InfoExtractor):
_VALID_URL = r'http://karaoketv\.co\.il/\?container=songs&id=(?P<id>[0-9]+)' _VALID_URL = r'https?://karaoketv\.co\.il/\?container=songs&id=(?P<id>[0-9]+)'
_TEST = { _TEST = {
'url': 'http://karaoketv.co.il/?container=songs&id=171568', 'url': 'http://karaoketv.co.il/?container=songs&id=171568',
'info_dict': { 'info_dict': {

View File

@ -12,7 +12,7 @@ from ..utils import (
class KarriereVideosIE(InfoExtractor): class KarriereVideosIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)' _VALID_URL = r'https?://(?:www\.)?karrierevideos\.at(?:/[^/]+)+/(?P<id>[^/]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin', 'url': 'http://www.karrierevideos.at/berufsvideos/mittlere-hoehere-schulen/altenpflegerin',
'info_dict': { 'info_dict': {

View File

@ -13,7 +13,7 @@ from ..utils import (
class KontrTubeIE(InfoExtractor): class KontrTubeIE(InfoExtractor):
IE_NAME = 'kontrtube' IE_NAME = 'kontrtube'
IE_DESC = 'KontrTube.ru - Труба зовёт' IE_DESC = 'KontrTube.ru - Труба зовёт'
_VALID_URL = r'http://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/' _VALID_URL = r'https?://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'
_TEST = { _TEST = {
'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/', 'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',

View File

@ -4,7 +4,7 @@ from .common import InfoExtractor
class Ku6IE(InfoExtractor): class Ku6IE(InfoExtractor):
_VALID_URL = r'http://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html' _VALID_URL = r'https?://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html'
_TEST = { _TEST = {
'url': 'http://v.ku6.com/show/JG-8yS14xzBr4bCn1pu0xw...html', 'url': 'http://v.ku6.com/show/JG-8yS14xzBr4bCn1pu0xw...html',
'md5': '01203549b9efbb45f4b87d55bdea1ed1', 'md5': '01203549b9efbb45f4b87d55bdea1ed1',

View File

@ -16,7 +16,7 @@ from ..utils import (
class KUSIIE(InfoExtractor): class KUSIIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))' _VALID_URL = r'https?://(?:www\.)?kusi\.com/(?P<path>story/.+|video\?clipId=(?P<clipId>\d+))'
_TESTS = [{ _TESTS = [{
'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold', 'url': 'http://www.kusi.com/story/31183873/turko-files-case-closed-put-on-hold',
'md5': 'f926e7684294cf8cb7bdf8858e1b3988', 'md5': 'f926e7684294cf8cb7bdf8858e1b3988',

View File

@ -2,13 +2,13 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
get_element_by_id, get_element_by_id,
clean_html, clean_html,
ExtractorError, ExtractorError,
InAdvancePagedList,
remove_start, remove_start,
) )
@ -23,7 +23,7 @@ class KuwoBaseIE(InfoExtractor):
{'format': 'aac', 'ext': 'aac', 'abr': 48, 'preference': 10} {'format': 'aac', 'ext': 'aac', 'abr': 48, 'preference': 10}
] ]
def _get_formats(self, song_id): def _get_formats(self, song_id, tolerate_ip_deny=False):
formats = [] formats = []
for file_format in self._FORMATS: for file_format in self._FORMATS:
song_url = self._download_webpage( song_url = self._download_webpage(
@ -32,7 +32,7 @@ class KuwoBaseIE(InfoExtractor):
song_id, note='Download %s url info' % file_format['format'], song_id, note='Download %s url info' % file_format['format'],
) )
if song_url == 'IPDeny': if song_url == 'IPDeny' and not tolerate_ip_deny:
raise ExtractorError('This song is blocked in this region', expected=True) raise ExtractorError('This song is blocked in this region', expected=True)
if song_url.startswith('http://') or song_url.startswith('https://'): if song_url.startswith('http://') or song_url.startswith('https://'):
@ -43,14 +43,19 @@ class KuwoBaseIE(InfoExtractor):
'preference': file_format['preference'], 'preference': file_format['preference'],
'abr': file_format.get('abr'), 'abr': file_format.get('abr'),
}) })
self._sort_formats(formats)
# XXX _sort_formats fails if there are not formats, while it's not the
# desired behavior if 'IPDeny' is ignored
# This check can be removed if https://github.com/rg3/youtube-dl/pull/8051 is merged
if not tolerate_ip_deny:
self._sort_formats(formats)
return formats return formats
class KuwoIE(KuwoBaseIE): class KuwoIE(KuwoBaseIE):
IE_NAME = 'kuwo:song' IE_NAME = 'kuwo:song'
IE_DESC = '酷我音乐' IE_DESC = '酷我音乐'
_VALID_URL = r'http://www\.kuwo\.cn/yinyue/(?P<id>\d+?)/' _VALID_URL = r'https?://www\.kuwo\.cn/yinyue/(?P<id>\d+?)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.kuwo.cn/yinyue/635632/', 'url': 'http://www.kuwo.cn/yinyue/635632/',
'info_dict': { 'info_dict': {
@ -75,6 +80,9 @@ class KuwoIE(KuwoBaseIE):
'params': { 'params': {
'format': 'mp3-320' 'format': 'mp3-320'
}, },
}, {
'url': 'http://www.kuwo.cn/yinyue/3197154?catalog=yueku2016',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -126,7 +134,7 @@ class KuwoIE(KuwoBaseIE):
class KuwoAlbumIE(InfoExtractor): class KuwoAlbumIE(InfoExtractor):
IE_NAME = 'kuwo:album' IE_NAME = 'kuwo:album'
IE_DESC = '酷我音乐 - 专辑' IE_DESC = '酷我音乐 - 专辑'
_VALID_URL = r'http://www\.kuwo\.cn/album/(?P<id>\d+?)/' _VALID_URL = r'https?://www\.kuwo\.cn/album/(?P<id>\d+?)/'
_TEST = { _TEST = {
'url': 'http://www.kuwo.cn/album/502294/', 'url': 'http://www.kuwo.cn/album/502294/',
'info_dict': { 'info_dict': {
@ -162,13 +170,11 @@ class KuwoAlbumIE(InfoExtractor):
class KuwoChartIE(InfoExtractor): class KuwoChartIE(InfoExtractor):
IE_NAME = 'kuwo:chart' IE_NAME = 'kuwo:chart'
IE_DESC = '酷我音乐 - 排行榜' IE_DESC = '酷我音乐 - 排行榜'
_VALID_URL = r'http://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm' _VALID_URL = r'https?://yinyue\.kuwo\.cn/billboard_(?P<id>[^.]+).htm'
_TEST = { _TEST = {
'url': 'http://yinyue.kuwo.cn/billboard_香港中文龙虎榜.htm', 'url': 'http://yinyue.kuwo.cn/billboard_香港中文龙虎榜.htm',
'info_dict': { 'info_dict': {
'id': '香港中文龙虎榜', 'id': '香港中文龙虎榜',
'title': '香港中文龙虎榜',
'description': 're:\d{4}\d{2}',
}, },
'playlist_mincount': 10, 'playlist_mincount': 10,
} }
@ -179,30 +185,24 @@ class KuwoChartIE(InfoExtractor):
url, chart_id, note='Download chart info', url, chart_id, note='Download chart info',
errnote='Unable to get chart info') errnote='Unable to get chart info')
chart_name = self._html_search_regex(
r'<h1[^>]+class="unDis">([^<]+)</h1>', webpage, 'chart name')
chart_desc = self._html_search_regex(
r'<p[^>]+class="tabDef">(\d{4}\d{2}期)</p>', webpage, 'chart desc')
entries = [ entries = [
self.url_result(song_url, 'Kuwo') for song_url in re.findall( self.url_result(song_url, 'Kuwo') for song_url in re.findall(
r'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/"', webpage) r'<a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)', webpage)
] ]
return self.playlist_result(entries, chart_id, chart_name, chart_desc) return self.playlist_result(entries, chart_id)
class KuwoSingerIE(InfoExtractor): class KuwoSingerIE(InfoExtractor):
IE_NAME = 'kuwo:singer' IE_NAME = 'kuwo:singer'
IE_DESC = '酷我音乐 - 歌手' IE_DESC = '酷我音乐 - 歌手'
_VALID_URL = r'http://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)' _VALID_URL = r'https?://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.kuwo.cn/mingxing/bruno+mars/', 'url': 'http://www.kuwo.cn/mingxing/bruno+mars/',
'info_dict': { 'info_dict': {
'id': 'bruno+mars', 'id': 'bruno+mars',
'title': 'Bruno Mars', 'title': 'Bruno Mars',
}, },
'playlist_count': 10, 'playlist_mincount': 329,
}, { }, {
'url': 'http://www.kuwo.cn/mingxing/Ali/music.htm', 'url': 'http://www.kuwo.cn/mingxing/Ali/music.htm',
'info_dict': { 'info_dict': {
@ -213,6 +213,8 @@ class KuwoSingerIE(InfoExtractor):
'skip': 'Regularly stalls travis build', # See https://travis-ci.org/rg3/youtube-dl/jobs/78878540 'skip': 'Regularly stalls travis build', # See https://travis-ci.org/rg3/youtube-dl/jobs/78878540
}] }]
PAGE_SIZE = 15
def _real_extract(self, url): def _real_extract(self, url):
singer_id = self._match_id(url) singer_id = self._match_id(url)
webpage = self._download_webpage( webpage = self._download_webpage(
@ -220,25 +222,28 @@ class KuwoSingerIE(InfoExtractor):
errnote='Unable to get singer info') errnote='Unable to get singer info')
singer_name = self._html_search_regex( singer_name = self._html_search_regex(
r'<div class="title clearfix">\s*<h1>([^<]+)<span', webpage, 'singer name' r'<h1>([^<]+)</h1>', webpage, 'singer name')
)
entries = [] artist_id = self._html_search_regex(
first_page_only = False if re.search(r'/music(?:_\d+)?\.htm', url) else True r'data-artistid="(\d+)"', webpage, 'artist id')
for page_num in itertools.count(1):
page_count = int(self._html_search_regex(
r'data-page="(\d+)"', webpage, 'page count'))
def page_func(page_num):
webpage = self._download_webpage( webpage = self._download_webpage(
'http://www.kuwo.cn/mingxing/%s/music_%d.htm' % (singer_id, page_num), 'http://www.kuwo.cn/artist/contentMusicsAjax',
singer_id, note='Download song list page #%d' % page_num, singer_id, note='Download song list page #%d' % (page_num + 1),
errnote='Unable to get song list page #%d' % page_num) errnote='Unable to get song list page #%d' % (page_num + 1),
query={'artistId': artist_id, 'pn': page_num, 'rn': self.PAGE_SIZE})
entries.extend([ return [
self.url_result(song_url, 'Kuwo') for song_url in re.findall( self.url_result(song_url, 'Kuwo') for song_url in re.findall(
r'<p[^>]+class="m_name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)/', r'<div[^>]+class="name"><a[^>]+href="(http://www\.kuwo\.cn/yinyue/\d+)',
webpage) webpage)
][:10 if first_page_only else None]) ]
if first_page_only or not re.search(r'<a[^>]+href="[^"]+">下一页</a>', webpage): entries = InAdvancePagedList(page_func, page_count, self.PAGE_SIZE)
break
return self.playlist_result(entries, singer_id, singer_name) return self.playlist_result(entries, singer_id, singer_name)
@ -246,7 +251,7 @@ class KuwoSingerIE(InfoExtractor):
class KuwoCategoryIE(InfoExtractor): class KuwoCategoryIE(InfoExtractor):
IE_NAME = 'kuwo:category' IE_NAME = 'kuwo:category'
IE_DESC = '酷我音乐 - 分类' IE_DESC = '酷我音乐 - 分类'
_VALID_URL = r'http://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm' _VALID_URL = r'https?://yinyue\.kuwo\.cn/yy/cinfo_(?P<id>\d+?).htm'
_TEST = { _TEST = {
'url': 'http://yinyue.kuwo.cn/yy/cinfo_86375.htm', 'url': 'http://yinyue.kuwo.cn/yy/cinfo_86375.htm',
'info_dict': { 'info_dict': {
@ -283,15 +288,21 @@ class KuwoCategoryIE(InfoExtractor):
class KuwoMvIE(KuwoBaseIE): class KuwoMvIE(KuwoBaseIE):
IE_NAME = 'kuwo:mv' IE_NAME = 'kuwo:mv'
IE_DESC = '酷我音乐 - MV' IE_DESC = '酷我音乐 - MV'
_VALID_URL = r'http://www\.kuwo\.cn/mv/(?P<id>\d+?)/' _VALID_URL = r'https?://www\.kuwo\.cn/mv/(?P<id>\d+?)/'
_TEST = { _TEST = {
'url': 'http://www.kuwo.cn/mv/6480076/', 'url': 'http://www.kuwo.cn/mv/6480076/',
'info_dict': { 'info_dict': {
'id': '6480076', 'id': '6480076',
'ext': 'mkv', 'ext': 'mp4',
'title': '我们家MV', 'title': 'My HouseMV',
'creator': '2PM', 'creator': '2PM',
}, },
# In this video, music URLs (anti.s) are blocked outside China and
# USA, while the MV URL (mvurl) is available globally, so force the MV
# URL for consistent results in different countries
'params': {
'format': 'mv',
},
} }
_FORMATS = KuwoBaseIE._FORMATS + [ _FORMATS = KuwoBaseIE._FORMATS + [
{'format': 'mkv', 'ext': 'mkv', 'preference': 250}, {'format': 'mkv', 'ext': 'mkv', 'preference': 250},
@ -313,7 +324,17 @@ class KuwoMvIE(KuwoBaseIE):
else: else:
raise ExtractorError('Unable to find song or singer names') raise ExtractorError('Unable to find song or singer names')
formats = self._get_formats(song_id) formats = self._get_formats(song_id, tolerate_ip_deny=True)
mv_url = self._download_webpage(
'http://www.kuwo.cn/yy/st/mvurl?rid=MUSIC_%s' % song_id,
song_id, note='Download %s MV URL' % song_id)
formats.append({
'url': mv_url,
'format_id': 'mv',
})
self._sort_formats(formats)
return { return {
'id': song_id, 'id': song_id,

View File

@ -19,7 +19,7 @@ from ..utils import (
class Laola1TvIE(InfoExtractor): class Laola1TvIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/[^/]+/(?P<slug>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/(?P<kind>[^/]+)/(?P<slug>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html', 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
'info_dict': { 'info_dict': {
@ -33,7 +33,7 @@ class Laola1TvIE(InfoExtractor):
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
} },
}, { }, {
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie', 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie',
'info_dict': { 'info_dict': {
@ -47,12 +47,28 @@ class Laola1TvIE(InfoExtractor):
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
} },
}, {
'url': 'http://www.laola1.tv/de-de/livestream/2016-03-22-belogorie-belgorod-trentino-diatec-lde',
'info_dict': {
'id': '487850',
'display_id': '2016-03-22-belogorie-belgorod-trentino-diatec-lde',
'ext': 'flv',
'title': 'Belogorie BELGOROD - TRENTINO Diatec',
'upload_date': '20160322',
'uploader': 'CEV - Europäischer Volleyball Verband',
'is_live': True,
'categories': ['Volleyball'],
},
'params': {
'skip_download': True,
},
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('slug') display_id = mobj.group('slug')
kind = mobj.group('kind')
lang = mobj.group('lang') lang = mobj.group('lang')
portal = mobj.group('portal') portal = mobj.group('portal')
@ -85,12 +101,17 @@ class Laola1TvIE(InfoExtractor):
_v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k) _v = lambda x, **k: xpath_text(hd_doc, './/video/' + x, **k)
title = _v('title', fatal=True) title = _v('title', fatal=True)
VS_TARGETS = {
'video': '2',
'livestream': '17',
}
req = sanitized_Request( req = sanitized_Request(
'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access?%s' % 'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access?%s' %
compat_urllib_parse.urlencode({ compat_urllib_parse.urlencode({
'videoId': video_id, 'videoId': video_id,
'target': '2', 'target': VS_TARGETS.get(kind, '2'),
'label': 'laola1tv', 'label': _v('label'),
'area': _v('area'), 'area': _v('area'),
}), }),
urlencode_postdata( urlencode_postdata(

View File

@ -28,7 +28,7 @@ from ..utils import (
class LeIE(InfoExtractor): class LeIE(InfoExtractor):
IE_DESC = '乐视网' IE_DESC = '乐视网'
_VALID_URL = r'http://www\.le\.com/ptv/vplay/(?P<id>\d+)\.html' _VALID_URL = r'https?://www\.le\.com/ptv/vplay/(?P<id>\d+)\.html'
_URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html' _URL_TEMPLATE = 'http://www.le.com/ptv/vplay/%s.html'
@ -196,7 +196,7 @@ class LeIE(InfoExtractor):
class LePlaylistIE(InfoExtractor): class LePlaylistIE(InfoExtractor):
_VALID_URL = r'http://[a-z]+\.le\.com/[a-z]+/(?P<id>[a-z0-9_]+)' _VALID_URL = r'https?://[a-z]+\.le\.com/[a-z]+/(?P<id>[a-z0-9_]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.le.com/tv/46177.html', 'url': 'http://www.le.com/tv/46177.html',

View File

@ -17,7 +17,7 @@ from ..utils import (
class LifeNewsIE(InfoExtractor): class LifeNewsIE(InfoExtractor):
IE_NAME = 'lifenews' IE_NAME = 'lifenews'
IE_DESC = 'LIFE | NEWS' IE_DESC = 'LIFE | NEWS'
_VALID_URL = r'http://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)' _VALID_URL = r'https?://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# single video embedded via video/source # single video embedded via video/source
@ -159,7 +159,7 @@ class LifeNewsIE(InfoExtractor):
class LifeEmbedIE(InfoExtractor): class LifeEmbedIE(InfoExtractor):
IE_NAME = 'life:embed' IE_NAME = 'life:embed'
_VALID_URL = r'http://embed\.life\.ru/embed/(?P<id>[\da-f]{32})' _VALID_URL = r'https?://embed\.life\.ru/embed/(?P<id>[\da-f]{32})'
_TEST = { _TEST = {
'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291', 'url': 'http://embed.life.ru/embed/e50c2dec2867350528e2574c899b8291',

View File

@ -123,7 +123,7 @@ class LimelightBaseIE(InfoExtractor):
class LimelightMediaIE(LimelightBaseIE): class LimelightMediaIE(LimelightBaseIE):
IE_NAME = 'limelight' IE_NAME = 'limelight'
_VALID_URL = r'(?:limelight:media:|http://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})' _VALID_URL = r'(?:limelight:media:|https?://link\.videoplatform\.limelight\.com/media/\??\bmediaId=)(?P<id>[a-z0-9]{32})'
_TESTS = [{ _TESTS = [{
'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86', 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
'info_dict': { 'info_dict': {
@ -176,7 +176,7 @@ class LimelightMediaIE(LimelightBaseIE):
class LimelightChannelIE(LimelightBaseIE): class LimelightChannelIE(LimelightBaseIE):
IE_NAME = 'limelight:channel' IE_NAME = 'limelight:channel'
_VALID_URL = r'(?:limelight:channel:|http://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})' _VALID_URL = r'(?:limelight:channel:|https?://link\.videoplatform\.limelight\.com/media/\??\bchannelId=)(?P<id>[a-z0-9]{32})'
_TEST = { _TEST = {
'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082', 'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
'info_dict': { 'info_dict': {
@ -207,7 +207,7 @@ class LimelightChannelIE(LimelightBaseIE):
class LimelightChannelListIE(LimelightBaseIE): class LimelightChannelListIE(LimelightBaseIE):
IE_NAME = 'limelight:channel_list' IE_NAME = 'limelight:channel_list'
_VALID_URL = r'(?:limelight:channel_list:|http://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})' _VALID_URL = r'(?:limelight:channel_list:|https?://link\.videoplatform\.limelight\.com/media/\?.*?\bchannelListId=)(?P<id>[a-z0-9]{32})'
_TEST = { _TEST = {
'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b', 'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
'info_dict': { 'info_dict': {

View File

@ -8,7 +8,7 @@ from .common import InfoExtractor
class M6IE(InfoExtractor): class M6IE(InfoExtractor):
IE_NAME = 'm6' IE_NAME = 'm6'
_VALID_URL = r'http://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html' _VALID_URL = r'https?://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html'
_TEST = { _TEST = {
'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html', 'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html',

View File

@ -13,7 +13,7 @@ from ..utils import (
class MailRuIE(InfoExtractor): class MailRuIE(InfoExtractor):
IE_NAME = 'mailru' IE_NAME = 'mailru'
IE_DESC = 'Видео@Mail.Ru' IE_DESC = 'Видео@Mail.Ru'
_VALID_URL = r'http://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)' _VALID_URL = r'https?://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)'
_TESTS = [ _TESTS = [
{ {

View File

@ -17,7 +17,7 @@ from ..utils import (
class MetacafeIE(InfoExtractor): class MetacafeIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' _VALID_URL = r'https?://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
_DISCLAIMER = 'http://www.metacafe.com/family_filter/' _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
_FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
IE_NAME = 'metacafe' IE_NAME = 'metacafe'

View File

@ -91,7 +91,7 @@ class MITIE(TechTVMITIE):
class OCWMITIE(InfoExtractor): class OCWMITIE(InfoExtractor):
IE_NAME = 'ocw.mit.edu' IE_NAME = 'ocw.mit.edu'
_VALID_URL = r'^http://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)' _VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
_BASE_URL = 'http://ocw.mit.edu/' _BASE_URL = 'http://ocw.mit.edu/'
_TESTS = [ _TESTS = [

View File

@ -14,7 +14,7 @@ from ..utils import (
class MiTeleIE(InfoExtractor): class MiTeleIE(InfoExtractor):
IE_DESC = 'mitele.es' IE_DESC = 'mitele.es'
_VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/' _VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/'
_TESTS = [{ _TESTS = [{
'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',

View File

@ -0,0 +1,81 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
parse_iso8601,
)
class MnetIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?mnet\.(?:com|interest\.me)/tv/vod/(?:.*?\bclip_id=)?(?P<id>[0-9]+)'
_TESTS = [{
'url': 'http://www.mnet.com/tv/vod/171008',
'info_dict': {
'id': '171008',
'title': 'SS_이해인@히든박스',
'description': 'md5:b9efa592c3918b615ba69fe9f8a05c55',
'duration': 88,
'upload_date': '20151231',
'timestamp': 1451564040,
'age_limit': 0,
'thumbnails': 'mincount:5',
'thumbnail': 're:^https?://.*\.jpg$',
'ext': 'flv',
},
'params': {
# rtmp download
'skip_download': True,
},
}, {
'url': 'http://mnet.interest.me/tv/vod/172790',
'only_matching': True,
}, {
'url': 'http://www.mnet.com/tv/vod/vod_view.asp?clip_id=172790&tabMenu=',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._download_json(
'http://content.api.mnet.com/player/vodConfig?id=%s&ctype=CLIP' % video_id,
video_id, 'Downloading vod config JSON')['data']['info']
title = info['title']
rtmp_info = self._download_json(
info['cdn'], video_id, 'Downloading vod cdn JSON')
formats = [{
'url': rtmp_info['serverurl'] + rtmp_info['fileurl'],
'ext': 'flv',
'page_url': url,
'player_url': 'http://flvfile.mnet.com/service/player/201602/cjem_player_tv.swf?v=201602191318',
}]
description = info.get('ment')
duration = parse_duration(info.get('time'))
timestamp = parse_iso8601(info.get('date'), delimiter=' ')
age_limit = info.get('adult')
if age_limit is not None:
age_limit = 0 if age_limit == 'N' else 18
thumbnails = [{
'id': thumb_format,
'url': thumb['url'],
'width': int_or_none(thumb.get('width')),
'height': int_or_none(thumb.get('height')),
} for thumb_format, thumb in info.get('cover', {}).items() if thumb.get('url')]
return {
'id': video_id,
'title': title,
'description': description,
'duration': duration,
'timestamp': timestamp,
'age_limit': age_limit,
'thumbnails': thumbnails,
'formats': formats,
}

View File

@ -13,7 +13,7 @@ from ..utils import (
class MooshareIE(InfoExtractor): class MooshareIE(InfoExtractor):
IE_NAME = 'mooshare' IE_NAME = 'mooshare'
IE_DESC = 'Mooshare.biz' IE_DESC = 'Mooshare.biz'
_VALID_URL = r'http://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})' _VALID_URL = r'https?://(?:www\.)?mooshare\.biz/(?P<id>[\da-z]{12})'
_TESTS = [ _TESTS = [
{ {

View File

@ -12,7 +12,7 @@ from ..utils import (
class MotherlessIE(InfoExtractor): class MotherlessIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)' _VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://motherless.com/AC3FFE1', 'url': 'http://motherless.com/AC3FFE1',
'md5': '310f62e325a9fafe64f68c0bccb6e75f', 'md5': '310f62e325a9fafe64f68c0bccb6e75f',
@ -69,6 +69,9 @@ class MotherlessIE(InfoExtractor):
">The page you're looking for cannot be found.<")): ">The page you're looking for cannot be found.<")):
raise ExtractorError('Video %s does not exist' % video_id, expected=True) raise ExtractorError('Video %s does not exist' % video_id, expected=True)
if '>The content you are trying to view is for friends only.' in webpage:
raise ExtractorError('Video %s is for friends only' % video_id, expected=True)
title = self._html_search_regex( title = self._html_search_regex(
r'id="view-upload-title">\s+([^<]+)<', webpage, 'title') r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
video_url = self._html_search_regex( video_url = self._html_search_regex(

View File

@ -9,7 +9,7 @@ from ..compat import (
class MotorsportIE(InfoExtractor): class MotorsportIE(InfoExtractor):
IE_DESC = 'motorsport.com' IE_DESC = 'motorsport.com'
_VALID_URL = r'http://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])' _VALID_URL = r'https?://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])'
_TEST = { _TEST = {
'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/', 'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/',
'info_dict': { 'info_dict': {

Some files were not shown because too many files have changed in this diff Show More