diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 423a08e4d..71a500f04 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.01.30.1*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.01.30.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.03.01*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.03.01** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.01.30.1 +[debug] youtube-dl version 2019.03.01 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 4872cd9fc..018a30641 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,72 @@ +version 2019.03.01 + +Core ++ [downloader/external] Add support for rate limit and retries for wget +* [downloader/external] Fix infinite retries for curl (#19303) + +Extractors +* [npo] Fix extraction (#20084) +* [francetv:site] Extend video id regex (#20029, #20071) ++ [periscope] Extract width and height (#20015) +* [servus] Fix extraction (#19297) +* [bbccouk] Make subtitles non fatal (#19651) +* [metacafe] Fix family filter bypass (#19287) + + +version 2019.02.18 + +Extractors +* [tvp:website] Fix and improve extraction ++ [tvp] Detect unavailable videos +* [tvp] Fix description extraction and make thumbnail optional ++ [linuxacademy] Add support for linuxacademy.com (#12207) +* [bilibili] Update keys (#19233) +* [udemy] Extend URL regular expressions (#14330, #15883) +* [udemy] Update User-Agent and detect captcha (#14713, #15839, #18126) +* [noovo] Fix extraction (#19230) +* [rai] Relax URL regular expression (#19232) ++ [vshare] Pass Referer to download request (#19205, #19221) ++ [openload] Add support for oload.live (#19222) +* [imgur] Use video id as title fallback (#18590) ++ [twitch] Add new source format detection approach (#19193) +* [tvplayhome] Fix video id extraction (#19190) +* [tvplayhome] Fix episode metadata extraction (#19190) +* [rutube:embed] Fix extraction (#19163) ++ [rutube:embed] Add support private videos (#19163) ++ [soundcloud] Extract more metadata ++ [trunews] Add support for trunews.com (#19153) ++ [linkedin:learning] Extract chapter_number and chapter_id (#19162) + + +version 2019.02.08 + +Core +* [utils] Improve JSON-LD regular expression (#18058) +* [YoutubeDL] Fallback to ie_key of matching extractor while making + download archive id when no explicit ie_key is provided (#19022) + +Extractors ++ [malltv] Add support for mall.tv (#18058, #17856) ++ [spankbang:playlist] Add support for playlists (#19145) +* [spankbang] Extend URL regular expression +* [trutv] Fix extraction (#17336) +* [toutv] Fix authentication (#16398, #18700) +* [pornhub] Fix tags and categories extraction (#13720, #19135) +* [pornhd] Fix formats extraction ++ [pornhd] Extract like count (#19123, #19125) +* [radiocanada] Switch to the new media requests (#19115) ++ [teachable] Add support for courses.workitdaily.com (#18871) +- [vporn] Remove extractor (#16276) ++ [soundcloud:pagedplaylist] Add ie and title to entries (#19022, #19086) ++ [drtuber] Extract duration (#19078) +* [soundcloud] Fix paged playlists extraction, add support for albums and update client id +* [soundcloud] Update client id +* [drtv] Improve preference (#19079) ++ [openload] Add support for openload.pw and oload.pw (#18930) ++ [openload] Add support for oload.info (#19073) +* [crackle] Authorize media detail request (#16931) + + version 2019.01.30.1 Core diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 2918520c3..d8a8d7ede 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -458,6 +458,7 @@ - **LineTV** - **linkedin:learning** - **linkedin:learning:course** + - **LinuxAcademy** - **LiTV** - **LiveLeak** - **LiveLeakEmbed** @@ -476,6 +477,7 @@ - **mailru:music**: Музыка@Mail.Ru - **mailru:music:search**: Музыка@Mail.Ru - **MakerTV** + - **MallTV** - **mangomolo:live** - **mangomolo:video** - **ManyVids** @@ -827,6 +829,7 @@ - **southpark.nl** - **southparkstudios.dk** - **SpankBang** + - **SpankBangPlaylist** - **Spankwire** - **Spiegel** - **Spiegel:Article**: Articles on spiegel.de @@ -913,6 +916,7 @@ - **ToypicsUser**: Toypics user profile - **TrailerAddict** (Currently broken) - **Trilulilu** + - **TruNews** - **TruTV** - **Tube8** - **TubiTv** @@ -1057,7 +1061,6 @@ - **Voot** - **VoxMedia** - **VoxMediaVolume** - - **Vporn** - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **Vrak** - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 75fa0bbb7..f0aa8466b 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -61,6 +61,7 @@ class TestInfoExtractor(unittest.TestCase): + ''' self.assertEqual(ie._og_search_title(html), 'Foo') self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') @@ -69,6 +70,7 @@ class TestInfoExtractor(unittest.TestCase): self.assertEqual(ie._og_search_property('foobar', html), 'Foo') self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar') self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar') + self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph') self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar') self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c168415ce..bc9fc270c 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -82,6 +82,7 @@ from .utils import ( sanitize_url, sanitized_Request, std_headers, + str_or_none, subtitles_filename, UnavailableVideoError, url_basename, @@ -2067,9 +2068,12 @@ class YoutubeDL(object): # and backwards compatibility with prior versions extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist if extractor is None: + url = str_or_none(info_dict.get('url')) + if not url: + return # Try to find matching extractor for the URL and take its ie_key for ie in self._ies: - if ie.suitable(info_dict['url']): + if ie.suitable(url): extractor = ie.ie_key() break else: diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 958d00aac..22e6093b3 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -121,7 +121,11 @@ class CurlFD(ExternalFD): cmd += self._valueless_option('--silent', 'noprogress') cmd += self._valueless_option('--verbose', 'verbose') cmd += self._option('--limit-rate', 'ratelimit') - cmd += self._option('--retry', 'retries') + retry = self._option('--retry', 'retries') + if len(retry) == 2: + if retry[1] in ('inf', 'infinite'): + retry[1] = '2147483647' + cmd += retry cmd += self._option('--max-filesize', 'max_filesize') cmd += self._option('--interface', 'source_address') cmd += self._option('--proxy', 'proxy') @@ -160,6 +164,12 @@ class WgetFD(ExternalFD): cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] + cmd += self._option('--limit-rate', 'ratelimit') + retry = self._option('--tries', 'retries') + if len(retry) == 2: + if retry[1] in ('inf', 'infinite'): + retry[1] = '0' + cmd += retry cmd += self._option('--bind-address', 'source_address') cmd += self._option('--proxy', 'proxy') cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate') diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index eac9a5a46..13340ec64 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -1,8 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals -import re import itertools +import re +import xml from .common import InfoExtractor from ..utils import ( @@ -17,6 +18,7 @@ from ..utils import ( parse_iso8601, try_get, unescapeHTML, + url_or_none, urlencode_postdata, urljoin, ) @@ -310,7 +312,13 @@ class BBCCoUkIE(InfoExtractor): def _get_subtitles(self, media, programme_id): subtitles = {} for connection in self._extract_connections(media): - captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions') + cc_url = url_or_none(connection.get('href')) + if not cc_url: + continue + captions = self._download_xml( + cc_url, programme_id, 'Downloading captions', fatal=False) + if not isinstance(captions, xml.etree.ElementTree.Element): + continue lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en') subtitles[lang] = [ { diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 4d6b051fe..3746671d3 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -93,8 +93,8 @@ class BiliBiliIE(InfoExtractor): }] }] - _APP_KEY = '84956560bc028eb7' - _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e' + _APP_KEY = 'iVGUTjsxvpLeuDCf' + _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt' def _report_error(self, result): if 'message' in result: diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c4ea2882f..c3b0586a0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1058,7 +1058,7 @@ class InfoExtractor(object): @staticmethod def _og_regexes(prop): content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))' - property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)' + property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)' % {'prop': re.escape(prop)}) template = r']+?%s[^>]+?%s' return [ diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 5e2cbe41d..fd1e7afad 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -56,22 +56,11 @@ class CrunchyrollBaseIE(InfoExtractor): if username is None: return - self._download_webpage( - 'https://www.crunchyroll.com/?a=formhandler', - None, 'Logging in', 'Wrong login info', - data=urlencode_postdata({ - 'formname': 'RpcApiUser_Login', - 'next_url': 'https://www.crunchyroll.com/acct/membership', - 'name': username, - 'password': password, - })) - - ''' login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') def is_logged(webpage): - return '
]+\bclass=["\']notAvailable__text["\'][^>]*>(.+?)
', + webpage, 'error', default=None) or clean_html( + get_element_by_attribute('class', 'msg error', webpage)) + if error: raise ExtractorError('%s said: %s' % ( - self.IE_NAME, clean_html(error_massage)), expected=True) + self.IE_NAME, clean_html(error)), expected=True) title = self._search_regex( r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P