diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index bfae97ddd..36559dd7b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.11.08.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.11.08.1** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.12.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.12.01** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.11.08.1 +[debug] youtube-dl version 2016.12.01 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.gitignore b/.gitignore index 354505d66..9ce4b5e2d 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,9 @@ updates_key.pem *.mp3 *.3gp *.wav +*.ape +*.mkv +*.swf *.part *.swp test/testdata diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0b5a5c1f8..495955bb5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -92,7 +92,7 @@ If you want to create a build of youtube-dl yourself, you'll need ### Adding support for a new site -If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**. +If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**. After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`): diff --git a/ChangeLog b/ChangeLog index d97156e20..bf5f26943 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,72 @@ version Extractors ++ [thisoldhouse] Recognize /tv-episode/ URLs (#11271) + +version 2016.12.01 + +Extractors +* [soundcloud] Update client id (#11327) +* [ruutu] Detect DRM protected videos ++ [liveleak] Add support for youtube embeds (#10688) +* [spike] Fix full episodes support (#11312) +* [comedycentral] Fix full episodes support +* [normalboots] Rewrite in terms of JWPlatform (#11184) +* [teamfourstar] Rewrite in terms of JWPlatform (#11184) +- [screenwavemedia] Remove extractor (#11184) + + +version 2016.11.27 + +Extractors ++ [webcaster] Add support for webcaster.pro ++ [azubu] Add support for azubu.uol.com.br (#11305) +* [viki] Prefer hls formats +* [viki] Fix rtmp formats extraction (#11255) +* [puls4] Relax URL regular expression (#11267) +* [vevo] Improve artist extraction (#10911) +* [mitele] Relax URL regular expression and extract more metadata (#11244) ++ [cbslocal] Recognize New York site (#11285) ++ [youtube:playlist] Pass disable_polymer in URL query (#11193) + + +version 2016.11.22 + +Extractors +* [hellporno] Fix video extension extraction (#11247) ++ [hellporno] Add support for hellporno.net (#11247) ++ [amcnetworks] Recognize more BBC America URLs (#11263) +* [funnyordie] Improve extraction (#11208) +* [extractor/generic] Improve limelight embeds support +- [crunchyroll] Remove ScaledBorderAndShadow from ASS subtitles (#8207, #9028) +* [bandcamp] Fix free downloads extraction and extract all formats (#11067) +* [twitter:card] Relax URL regular expression (#11225) ++ [tvanouvelles] Add support for tvanouvelles.ca (#10616) + + +version 2016.11.18 + +Extractors +* [youtube:live] Relax URL regular expression (#11164) +* [openload] Fix extraction (#10408, #11122) +* [vlive] Prefer locale over language for subtitles id (#11203) + + +version 2016.11.14.1 + +Core ++ [downoader/fragment,f4m,hls] Respect HTTP headers from info dict +* [extractor/common] Fix media templates with Bandwidth substitution pattern in + MPD manifests (#11175) +* [extractor/common] Improve thumbnail extraction from JSON-LD + +Extractors ++ [nrk] Workaround geo restriction ++ [nrk] Improve error detection and messages ++ [afreecatv] Add support for vod.afreecatv.com (#11174) +* [cda] Fix and improve extraction (#10929, #10936) +* [plays] Fix extraction (#11165) +* [eagleplatform] Fix extraction (#11160) + [audioboom] Recognize /posts/ URLs (#11149) diff --git a/Makefile b/Makefile index b7cec1666..9d1ddc9d1 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete diff --git a/README.md b/README.md index 98e374420..840932298 100644 --- a/README.md +++ b/README.md @@ -664,7 +664,7 @@ $ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best' # Download best format available but not better that 480p $ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]' -# Download best video only format but no bigger that 50 MB +# Download best video only format but no bigger than 50 MB $ youtube-dl -f 'best[filesize<50M]' # Download best format available via direct link over HTTP/HTTPS protocol @@ -930,7 +930,7 @@ If you want to create a build of youtube-dl yourself, you'll need ### Adding support for a new site -If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**. +If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**. After you have ensured this site is distributing it's content legally, you can follow this quick list (assuming your service is called `yourextractor`): diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py index ce68f26f9..3d1391334 100755 --- a/devscripts/bash-completion.py +++ b/devscripts/bash-completion.py @@ -25,5 +25,6 @@ def build_completion(opt_parser): filled_template = template.replace("{{flags}}", " ".join(opts_flag)) f.write(filled_template) + parser = youtube_dl.parseOpts()[0] build_completion(parser) diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py index 3b8021e74..30716ad8e 100644 --- a/devscripts/create-github-release.py +++ b/devscripts/create-github-release.py @@ -2,11 +2,13 @@ from __future__ import unicode_literals import base64 +import io import json import mimetypes import netrc import optparse import os +import re import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -90,16 +92,23 @@ class GitHubReleaser(object): def main(): - parser = optparse.OptionParser(usage='%prog VERSION BUILDPATH') + parser = optparse.OptionParser(usage='%prog CHANGELOG VERSION BUILDPATH') options, args = parser.parse_args() - if len(args) != 2: + if len(args) != 3: parser.error('Expected a version and a build directory') - version, build_path = args + changelog_file, version, build_path = args + + with io.open(changelog_file, encoding='utf-8') as inf: + changelog = inf.read() + + mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog) + body = mobj.group(1) if mobj else '' releaser = GitHubReleaser() - new_release = releaser.create_release(version, name='youtube-dl %s' % version) + new_release = releaser.create_release( + version, name='youtube-dl %s' % version, body=body) release_id = new_release['id'] for asset in os.listdir(build_path): diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py index 41629d87d..51d19dd33 100755 --- a/devscripts/fish-completion.py +++ b/devscripts/fish-completion.py @@ -44,5 +44,6 @@ def build_completion(opt_parser): with open(FISH_COMPLETION_FILE, 'w') as f: f.write(filled_template) + parser = youtube_dl.parseOpts()[0] build_completion(parser) diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py index 2e389fc8e..e3df42cc2 100644 --- a/devscripts/generate_aes_testdata.py +++ b/devscripts/generate_aes_testdata.py @@ -23,6 +23,7 @@ def openssl_encode(algo, key, iv): out, _ = prog.communicate(secret_msg) return out + iv = key = [0x20, 0x15] + 14 * [0] r = openssl_encode('aes-128-cbc', key, iv) diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py index 503c1372f..531c93c70 100755 --- a/devscripts/gh-pages/update-sites.py +++ b/devscripts/gh-pages/update-sites.py @@ -32,5 +32,6 @@ def main(): with open('supportedsites.html', 'w', encoding='utf-8') as sitesf: sitesf.write(template) + if __name__ == '__main__': main() diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py index 5e454a429..226d1a5d6 100755 --- a/devscripts/make_contributing.py +++ b/devscripts/make_contributing.py @@ -28,5 +28,6 @@ def main(): with io.open(outfile, 'w', encoding='utf-8') as outf: outf.write(out) + if __name__ == '__main__': main() diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 9a79c2bc5..19114d30d 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -59,6 +59,7 @@ def build_lazy_ie(ie, name): s += make_valid_template.format(valid_url=ie._make_valid_url()) return s + # find the correct sorting and add the required base classes so that sublcasses # can be correctly created classes = _ALL_CLASSES[:-1] diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py index 8cb4a4638..764795bc5 100644 --- a/devscripts/make_supportedsites.py +++ b/devscripts/make_supportedsites.py @@ -41,5 +41,6 @@ def main(): with io.open(outfile, 'w', encoding='utf-8') as outf: outf.write(out) + if __name__ == '__main__': main() diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index ce548739f..f9fe63f1f 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -74,5 +74,6 @@ def filter_options(readme): return ret + if __name__ == '__main__': main() diff --git a/devscripts/release.sh b/devscripts/release.sh index 1af61aa0b..4db5def5d 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -110,7 +110,7 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done ROOT=$(pwd) -python devscripts/create-github-release.py $version "$ROOT/build/$version" +python devscripts/create-github-release.py ChangeLog $version "$ROOT/build/$version" ssh ytdl@yt-dl.org "sh html/update_latest.sh $version" diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py index 04728e8e2..60aaf76cc 100755 --- a/devscripts/zsh-completion.py +++ b/devscripts/zsh-completion.py @@ -44,5 +44,6 @@ def build_completion(opt_parser): with open(ZSH_COMPLETION_FILE, "w") as f: f.write(template) + parser = youtube_dl.parseOpts()[0] build_completion(parser) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 77832504a..edb76d9cc 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -158,6 +158,7 @@ - **CollegeRama** - **ComCarCoff** - **ComedyCentral** + - **ComedyCentralFullEpisodes** - **ComedyCentralShortname** - **ComedyCentralTV** - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED @@ -643,7 +644,6 @@ - **Screencast** - **ScreencastOMatic** - **ScreenJunkies** - - **ScreenwaveMedia** - **Seeker** - **SenateISVP** - **SendtoNews** @@ -715,7 +715,7 @@ - **teachertube:user:collection**: teachertube.com user and collection videos - **TeachingChannel** - **Teamcoco** - - **TeamFour** + - **TeamFourStar** - **TechTalks** - **techtv.mit.edu** - **ted** @@ -771,6 +771,8 @@ - **TV2Article** - **TV3** - **TV4**: tv4.se and tv4play.se + - **TVANouvelles** + - **TVANouvellesArticle** - **TVC** - **TVCArticle** - **tvigle**: Интернет-телевидение Tvigle.ru @@ -880,6 +882,8 @@ - **WatchIndianPorn**: Watch Indian Porn - **WDR** - **wdr:mobile** + - **Webcaster** + - **WebcasterFeed** - **WebOfStories** - **WebOfStoriesPlaylist** - **WeiqiTV**: WQTV diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index a98305c74..437c7270e 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -84,5 +84,6 @@ class TestInfoExtractor(unittest.TestCase): self.assertRaises(ExtractorError, self.ie._download_json, uri, None) self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) + if __name__ == '__main__': unittest.main() diff --git a/test/test_aes.py b/test/test_aes.py index 315a3f5ae..54078a66d 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -51,5 +51,6 @@ class TestAES(unittest.TestCase): decrypted = (aes_decrypt_text(encrypted, password, 32)) self.assertEqual(decrypted, self.secret_msg) + if __name__ == '__main__': unittest.main() diff --git a/test/test_download.py b/test/test_download.py index a3f1c0644..463952989 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -60,6 +60,7 @@ def _file_md5(fn): with open(fn, 'rb') as f: return hashlib.md5(f.read()).hexdigest() + defs = gettestcases() @@ -217,6 +218,7 @@ def generator(test_case): return test_template + # And add them to TestDownload for n, test_case in enumerate(defs): test_method = generator(test_case) diff --git a/test/test_execution.py b/test/test_execution.py index 620db080e..11661bb68 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -39,5 +39,6 @@ class TestExecution(unittest.TestCase): _, stderr = p.communicate() self.assertFalse(stderr) + if __name__ == '__main__': unittest.main() diff --git a/test/test_http.py b/test/test_http.py index bb0a098e4..7a7a3510f 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -169,5 +169,6 @@ class TestProxy(unittest.TestCase): # b'xn--fiq228c' is '中文'.encode('idna') self.assertEqual(response, 'normal: http://xn--fiq228c.tw/') + if __name__ == '__main__': unittest.main() diff --git a/test/test_iqiyi_sdk_interpreter.py b/test/test_iqiyi_sdk_interpreter.py index 9d95cb606..789059dbe 100644 --- a/test/test_iqiyi_sdk_interpreter.py +++ b/test/test_iqiyi_sdk_interpreter.py @@ -43,5 +43,6 @@ class TestIqiyiSDKInterpreter(unittest.TestCase): ie._login() self.assertTrue('unable to log in:' in logger.messages[0]) + if __name__ == '__main__': unittest.main() diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 63c350b8f..c24b8ca74 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -104,6 +104,14 @@ class TestJSInterpreter(unittest.TestCase): }''') self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50]) + def test_call(self): + jsi = JSInterpreter(''' + function x() { return 2; } + function y(a) { return x() + a; } + function z() { return y(3); } + ''') + self.assertEqual(jsi.call_function('z'), 5) + if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index cb75ca53e..2e3cd0179 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1075,5 +1075,6 @@ The first line self.assertEqual(get_element_by_class('foo', html), 'nice') self.assertEqual(get_element_by_class('no-such-class', html), None) + if __name__ == '__main__': unittest.main() diff --git a/test/test_verbose_output.py b/test/test_verbose_output.py index 96a66f7a0..c1465fe8c 100644 --- a/test/test_verbose_output.py +++ b/test/test_verbose_output.py @@ -66,5 +66,6 @@ class TestVerboseOutput(unittest.TestCase): self.assertTrue(b'-p' in serr) self.assertTrue(b'secret' not in serr) + if __name__ == '__main__': unittest.main() diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py index 8de08f2d6..41abdfe3b 100644 --- a/test/test_write_annotations.py +++ b/test/test_write_annotations.py @@ -24,6 +24,7 @@ class YoutubeDL(youtube_dl.YoutubeDL): super(YoutubeDL, self).__init__(*args, **kwargs) self.to_stderr = self.to_screen + params = get_params({ 'writeannotations': True, 'skip_download': True, @@ -74,5 +75,6 @@ class TestAnnotations(unittest.TestCase): def tearDown(self): try_rm(ANNOTATIONS_FILE) + if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index af1c45421..7a33dbf88 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -66,5 +66,6 @@ class TestYoutubeLists(unittest.TestCase): for entry in result['entries']: self.assertTrue(entry.get('title')) + if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 060864434..f0c370eee 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -114,6 +114,7 @@ def make_tfunc(url, stype, sig_input, expected_sig): test_func.__name__ = str('test_signature_' + stype + '_' + test_id) setattr(TestSignature, test_func.__name__, test_func) + for test_spec in _TESTS: make_tfunc(*test_spec) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 643393558..6850d95e1 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -95,8 +95,7 @@ def _real_main(argv=None): write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') except IOError: sys.exit('ERROR: batch file could not be read') - all_urls = batch_urls + args - all_urls = [url.strip() for url in all_urls] + all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls _enc = preferredencoding() all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls] @@ -450,4 +449,5 @@ def main(argv=None): except KeyboardInterrupt: sys.exit('\nERROR: Interrupted by user') + __all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors'] diff --git a/youtube_dl/aes.py b/youtube_dl/aes.py index a01c367de..b8ff45481 100644 --- a/youtube_dl/aes.py +++ b/youtube_dl/aes.py @@ -174,6 +174,7 @@ def aes_decrypt_text(data, password, key_size_bytes): return plaintext + RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, @@ -328,4 +329,5 @@ def inc(data): break return data + __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text'] diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index b8aaf5a46..83ee7e257 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2491,6 +2491,7 @@ class _TreeBuilder(etree.TreeBuilder): def doctype(self, name, pubid, system): pass + if sys.version_info[0] >= 3: def compat_etree_fromstring(text): return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) @@ -2787,6 +2788,7 @@ def workaround_optparse_bug9161(): return real_add_option(self, *bargs, **bkwargs) optparse.OptionGroup.add_option = _compat_add_option + if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3 compat_get_terminal_size = shutil.get_terminal_size else: diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 0aeae3b8f..5d3e5d8d3 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -293,6 +293,7 @@ class FFmpegFD(ExternalFD): class AVconvFD(FFmpegFD): pass + _BY_NAME = dict( (klass.get_basename(), klass) for name, klass in globals().items() diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 80c21d40b..688e086eb 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -314,7 +314,8 @@ class F4mFD(FragmentFD): man_url = info_dict['url'] requested_bitrate = info_dict.get('tbr') self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) - urlh = self.ydl.urlopen(man_url) + + urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.geturl() # Some manifests may be malformed, e.g. prosiebensat1 generated manifests # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244 @@ -387,7 +388,10 @@ class F4mFD(FragmentFD): url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query)) frag_filename = '%s-%s' % (ctx['tmpfilename'], name) try: - success = ctx['dl'].download(frag_filename, {'url': url_parsed.geturl()}) + success = ctx['dl'].download(frag_filename, { + 'url': url_parsed.geturl(), + 'http_headers': info_dict.get('http_headers'), + }) if not success: return False (down, frag_sanitized) = sanitize_open(frag_filename, 'rb') diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 84aacf7db..60df627a6 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -9,6 +9,7 @@ from ..utils import ( error_to_compat_str, encodeFilename, sanitize_open, + sanitized_Request, ) @@ -37,6 +38,10 @@ class FragmentFD(FileDownloader): def report_skip_fragment(self, fragment_name): self.to_screen('[download] Skipping fragment %s...' % fragment_name) + def _prepare_url(self, info_dict, url): + headers = info_dict.get('http_headers') + return sanitized_Request(url, None, headers) if headers else url + def _prepare_and_start_frag_download(self, ctx): self._prepare_frag_download(ctx) self._start_frag_download(ctx) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 541b92ee1..7373ec05f 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -59,7 +59,8 @@ class HlsFD(FragmentFD): def real_download(self, filename, info_dict): man_url = info_dict['url'] self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) - manifest = self.ydl.urlopen(man_url).read() + + manifest = self.ydl.urlopen(self._prepare_url(info_dict, man_url)).read() s = manifest.decode('utf-8', 'ignore') @@ -112,7 +113,10 @@ class HlsFD(FragmentFD): count = 0 while count <= fragment_retries: try: - success = ctx['dl'].download(frag_filename, {'url': frag_url}) + success = ctx['dl'].download(frag_filename, { + 'url': frag_url, + 'http_headers': info_dict.get('http_headers'), + }) if not success: return False down, frag_sanitized = sanitize_open(frag_filename, 'rb') diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index 6adb6d824..c5e079a40 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -26,7 +26,7 @@ class AENetworksIE(AENetworksBaseIE): _VALID_URL = r'https?://(?:www\.)?(?P(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P[^/]+(?:/[^/]+){0,2})|movies/(?P[^/]+)/full-movie)' _TESTS = [{ 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', - 'md5': '8ff93eb073449f151d6b90c0ae1ef0c7', + 'md5': 'a97a65f7e823ae10e9244bc5433d5fe6', 'info_dict': { 'id': '22253814', 'ext': 'mp4', @@ -99,7 +99,7 @@ class AENetworksIE(AENetworksBaseIE): query = { 'mbr': 'true', - 'assetTypes': 'medium_video_s3' + 'assetTypes': 'high_video_s3' } video_id = self._html_search_meta('aetn:VideoID', webpage) media_url = self._search_regex( @@ -155,7 +155,7 @@ class HistoryTopicIE(AENetworksBaseIE): 'id': 'world-war-i-history', 'title': 'World War I History', }, - 'playlist_mincount': 24, + 'playlist_mincount': 23, }, { 'url': 'http://www.history.com/topics/world-war-i-history/videos', 'only_matching': True, @@ -193,7 +193,8 @@ class HistoryTopicIE(AENetworksBaseIE): return self.theplatform_url_result( release_url, video_id, { 'mbr': 'true', - 'switch': 'hls' + 'switch': 'hls', + 'assetTypes': 'high_video_ak', }) else: webpage = self._download_webpage(url, topic_id) @@ -203,6 +204,7 @@ class HistoryTopicIE(AENetworksBaseIE): entries.append(self.theplatform_url_result( video_attributes['data-release-url'], video_attributes['data-id'], { 'mbr': 'true', - 'switch': 'hls' + 'switch': 'hls', + 'assetTypes': 'high_video_ak', })) return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage)) diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py index 518c61f67..75b366993 100644 --- a/youtube_dl/extractor/afreecatv.py +++ b/youtube_dl/extractor/afreecatv.py @@ -11,6 +11,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + update_url_query, xpath_element, xpath_text, ) @@ -18,12 +19,18 @@ from ..utils import ( class AfreecaTVIE(InfoExtractor): IE_DESC = 'afreecatv.com' - _VALID_URL = r'''(?x)^ - https?://(?:(live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)? - (?: - /app/(?:index|read_ucc_bbs)\.cgi| - /player/[Pp]layer\.(?:swf|html)) - \?.*?\bnTitleNo=(?P\d+)''' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)? + (?: + /app/(?:index|read_ucc_bbs)\.cgi| + /player/[Pp]layer\.(?:swf|html) + )\?.*?\bnTitleNo=| + vod\.afreecatv\.com/PLAYER/STATION/ + ) + (?P\d+) + ''' _TESTS = [{ 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', 'md5': 'f72c89fe7ecc14c1b5ce506c4996046e', @@ -66,6 +73,9 @@ class AfreecaTVIE(InfoExtractor): }, { 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', 'only_matching': True, + }, { + 'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030', + 'only_matching': True, }] @staticmethod @@ -83,7 +93,9 @@ class AfreecaTVIE(InfoExtractor): info_url = compat_urlparse.urlunparse(parsed_url._replace( netloc='afbbs.afreecatv.com:8080', path='/api/video/get_video_info.php')) - video_xml = self._download_xml(info_url, video_id) + + video_xml = self._download_xml( + update_url_query(info_url, {'nTitleNo': video_id}), video_id) if xpath_element(video_xml, './track/video/file') is None: raise ExtractorError('Specified AfreecaTV video does not exist', diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py index d2b03b177..87c803e94 100644 --- a/youtube_dl/extractor/amcnetworks.py +++ b/youtube_dl/extractor/amcnetworks.py @@ -10,7 +10,7 @@ from ..utils import ( class AMCNetworksIE(ThePlatformIE): - _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?season-\d+/episode-\d+(?:-(?:[^/]+/)?|/))(?P[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|wetv)\.com/(?:movies/|shows/[^/]+/(?:full-episodes/)?[^/]+/episode-\d+(?:-(?:[^/]+/)?|/))(?P[^/?#]+)' _TESTS = [{ 'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1', 'md5': '', @@ -41,6 +41,9 @@ class AMCNetworksIE(ThePlatformIE): }, { 'url': 'http://www.ifc.com/movies/chaos', 'only_matching': True, + }, { + 'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py index 72e1bd59d..1eebf5dfd 100644 --- a/youtube_dl/extractor/azubu.py +++ b/youtube_dl/extractor/azubu.py @@ -11,7 +11,7 @@ from ..utils import ( class AzubuIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?azubu\.tv/[^/]+#!/play/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/[^/]+#!/play/(?P\d+)' _TESTS = [ { 'url': 'http://www.azubu.tv/GSL#!/play/15575/2014-hot6-cup-last-big-match-ro8-day-1', @@ -103,12 +103,15 @@ class AzubuIE(InfoExtractor): class AzubuLiveIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?azubu\.tv/(?P[^/]+)$' + _VALID_URL = r'https?://(?:www\.)?azubu\.(?:tv|uol.com.br)/(?P[^/]+)$' - _TEST = { + _TESTS = [{ 'url': 'http://www.azubu.tv/MarsTVMDLen', 'only_matching': True, - } + }, { + 'url': 'http://azubu.uol.com.br/adolfz', + 'only_matching': True, + }] def _real_extract(self, url): user = self._match_id(url) diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 249c3d956..88c590e98 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -1,7 +1,9 @@ from __future__ import unicode_literals import json +import random import re +import time from .common import InfoExtractor from ..compat import ( @@ -12,6 +14,9 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + parse_filesize, + unescapeHTML, + update_url_query, ) @@ -81,35 +86,68 @@ class BandcampIE(InfoExtractor): r'(?ms)var TralbumData = .*?[{,]\s*id: (?P\d+),?$', webpage, 'video id') - download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page') - # We get the dictionary of the track from some javascript code - all_info = self._parse_json(self._search_regex( - r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id) - info = all_info[0] - # We pick mp3-320 for now, until format selection can be easily implemented. - mp3_info = info['downloads']['mp3-320'] - # If we try to use this url it says the link has expired - initial_url = mp3_info['url'] - m_url = re.match( - r'(?Phttp://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P.*?)&id=(?P.*?)&ts=(?P.*)$', - initial_url) - # We build the url we will use to get the final track url - # This url is build in Bandcamp in the script download_bunde_*.js - request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts')) - final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url') - # If we could correctly generate the .rand field the url would be - # in the "download_url" key - final_url = self._proto_relative_url(self._search_regex( - r'"retry_url":"(.+?)"', final_url_webpage, 'final video URL'), 'http:') + download_webpage = self._download_webpage( + download_link, video_id, 'Downloading free downloads page') + + blob = self._parse_json( + self._search_regex( + r'data-blob=(["\'])(?P{.+?})\1', download_webpage, + 'blob', group='blob'), + video_id, transform_source=unescapeHTML) + + info = blob['digital_items'][0] + + downloads = info['downloads'] + track = info['title'] + + artist = info.get('artist') + title = '%s - %s' % (artist, track) if artist else track + + download_formats = {} + for f in blob['download_formats']: + name, ext = f.get('name'), f.get('file_extension') + if all(isinstance(x, compat_str) for x in (name, ext)): + download_formats[name] = ext.strip('.') + + formats = [] + for format_id, f in downloads.items(): + format_url = f.get('url') + if not format_url: + continue + # Stat URL generation algorithm is reverse engineered from + # download_*_bundle_*.js + stat_url = update_url_query( + format_url.replace('/download/', '/statdownload/'), { + '.rand': int(time.time() * 1000 * random.random()), + }) + format_id = f.get('encoding_name') or format_id + stat = self._download_json( + stat_url, video_id, 'Downloading %s JSON' % format_id, + transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1], + fatal=False) + if not stat: + continue + retry_url = stat.get('retry_url') + if not isinstance(retry_url, compat_str): + continue + formats.append({ + 'url': self._proto_relative_url(retry_url, 'http:'), + 'ext': download_formats.get(format_id), + 'format_id': format_id, + 'format_note': f.get('description'), + 'filesize': parse_filesize(f.get('size_mb')), + 'vcodec': 'none', + }) + self._sort_formats(formats) return { 'id': video_id, - 'title': info['title'], - 'ext': 'mp3', - 'vcodec': 'none', - 'url': final_url, + 'title': title, 'thumbnail': info.get('thumb_url'), 'uploader': info.get('artist'), + 'artist': artist, + 'track': track, + 'formats': formats, } diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index 2a8cd64b9..c5e11e8eb 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -45,7 +45,8 @@ class BloombergIE(InfoExtractor): name = self._match_id(url) webpage = self._download_webpage(url, name) video_id = self._search_regex( - r'["\']bmmrId["\']\s*:\s*(["\'])(?P.+?)\1', + (r'["\']bmmrId["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', + r'videoId\s*:\s*(["\'])(?P(?:(?!\1).)+)\1'), webpage, 'id', group='url', default=None) if not video_id: bplayer_data = self._parse_json(self._search_regex( diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py index 289709c97..8d5f11dd1 100644 --- a/youtube_dl/extractor/cbslocal.py +++ b/youtube_dl/extractor/cbslocal.py @@ -4,11 +4,14 @@ from __future__ import unicode_literals from .anvato import AnvatoIE from .sendtonews import SendtoNewsIE from ..compat import compat_urlparse -from ..utils import unified_timestamp +from ..utils import ( + parse_iso8601, + unified_timestamp, +) class CBSLocalIE(AnvatoIE): - _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P[0-9a-z-]+)' + _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P[0-9a-z-]+)' _TESTS = [{ # Anvato backend @@ -49,6 +52,31 @@ class CBSLocalIE(AnvatoIE): # m3u8 download 'skip_download': True, }, + }, { + 'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/', + 'info_dict': { + 'id': '3580809', + 'ext': 'mp4', + 'title': 'A Very Blue Anniversary', + 'description': 'CBS2’s Cindy Hsu has more.', + 'thumbnail': 're:^https?://.*', + 'timestamp': 1479962220, + 'upload_date': '20161124', + 'uploader': 'CBS', + 'subtitles': { + 'en': 'mincount:5', + }, + 'categories': [ + 'Stations\\Spoken Word\\WCBSTV', + 'Syndication\\AOL', + 'Syndication\\MSN', + 'Syndication\\NDN', + 'Syndication\\Yahoo', + 'Content\\News', + 'Content\\News\\Local News', + ], + 'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'], + }, }] def _real_extract(self, url): @@ -64,8 +92,11 @@ class CBSLocalIE(AnvatoIE): info_dict = self._extract_anvato_videos(webpage, display_id) time_str = self._html_search_regex( - r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False) - timestamp = unified_timestamp(time_str) + r'class="entry-date">([^<]+)<', webpage, 'released date', default=None) + if time_str: + timestamp = unified_timestamp(time_str) + else: + timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage)) info_dict.update({ 'display_id': display_id, diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py index 8af318703..e00bdaf66 100755 --- a/youtube_dl/extractor/cda.py +++ b/youtube_dl/extractor/cda.py @@ -5,14 +5,16 @@ import re from .common import InfoExtractor from ..utils import ( - decode_packed_codes, ExtractorError, - parse_duration + float_or_none, + int_or_none, + parse_duration, ) class CDAIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P[0-9a-z]+)' + _BASE_URL = 'http://www.cda.pl/' _TESTS = [{ 'url': 'http://www.cda.pl/video/5749950c', 'md5': '6f844bf51b15f31fae165365707ae970', @@ -21,6 +23,9 @@ class CDAIE(InfoExtractor): 'ext': 'mp4', 'height': 720, 'title': 'Oto dlaczego przed zakrętem należy zwolnić.', + 'description': 'md5:269ccd135d550da90d1662651fcb9772', + 'thumbnail': 're:^https?://.*\.jpg$', + 'average_rating': float, 'duration': 39 } }, { @@ -30,6 +35,11 @@ class CDAIE(InfoExtractor): 'id': '57413289', 'ext': 'mp4', 'title': 'Lądowanie na lotnisku na Maderze', + 'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'crash404', + 'view_count': int, + 'average_rating': float, 'duration': 137 } }, { @@ -39,31 +49,55 @@ class CDAIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage('http://ebd.cda.pl/0x0/' + video_id, video_id) + self._set_cookie('cda.pl', 'cda.player', 'html5') + webpage = self._download_webpage( + self._BASE_URL + '/video/' + video_id, video_id) if 'Ten film jest dostępny dla użytkowników premium' in webpage: raise ExtractorError('This video is only available for premium users.', expected=True) - title = self._html_search_regex(r'(.+?)', webpage, 'title') - formats = [] + uploader = self._search_regex(r'''(?x) + <(span|meta)[^>]+itemprop=(["\'])author\2[^>]*> + (?:<\1[^>]*>[^<]*|(?!)(?:.|\n))*? + <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P[^<]+) + ''', webpage, 'uploader', default=None, group='uploader') + view_count = self._search_regex( + r'Odsłony:(?:\s| )*([0-9]+)', webpage, + 'view_count', default=None) + average_rating = self._search_regex( + r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P[0-9.]+)', + webpage, 'rating', fatal=False, group='rating_value') + info_dict = { 'id': video_id, - 'title': title, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'uploader': uploader, + 'view_count': int_or_none(view_count), + 'average_rating': float_or_none(average_rating), + 'thumbnail': self._og_search_thumbnail(webpage), 'formats': formats, 'duration': None, } def extract_format(page, version): - unpacked = decode_packed_codes(page) - format_url = self._search_regex( - r"(?:file|url)\s*:\s*(\\?[\"'])(?Phttp.+?)\1", unpacked, - '%s url' % version, fatal=False, group='url') - if not format_url: + json_str = self._search_regex( + r'player_data=(\\?["\'])(?P.+?)\1', page, + '%s player_json' % version, fatal=False, group='player_data') + if not json_str: + return + player_data = self._parse_json( + json_str, '%s player_data' % version, fatal=False) + if not player_data: + return + video = player_data.get('video') + if not video or 'file' not in video: + self.report_warning('Unable to extract %s version information' % version) return f = { - 'url': format_url, + 'url': video['file'], } m = re.search( r']+data-quality="(?P[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P[0-9]+)p', @@ -75,9 +109,7 @@ class CDAIE(InfoExtractor): }) info_dict['formats'].append(f) if not info_dict['duration']: - info_dict['duration'] = parse_duration(self._search_regex( - r"duration\s*:\s*(\\?[\"'])(?P.+?)\1", - unpacked, 'duration', fatal=False, group='duration')) + info_dict['duration'] = parse_duration(video.get('duration')) extract_format(webpage, 'default') @@ -85,7 +117,8 @@ class CDAIE(InfoExtractor): r']+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)', webpage): webpage = self._download_webpage( - href, video_id, 'Downloading %s version information' % resolution, fatal=False) + self._BASE_URL + href, video_id, + 'Downloading %s version information' % resolution, fatal=False) if not webpage: # Manually report warning because empty page is returned when # invalid version is requested. diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 88346dde7..0239dfd84 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class ComedyCentralIE(MTVServicesInfoExtractor): _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/ - (video-clips|episodes|cc-studios|video-collections|full-episodes|shows) + (video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes))) /(?P.*)''' _FEED_URL = 'http://comedycentral.com/feeds/mrss/' @@ -27,6 +27,40 @@ class ComedyCentralIE(MTVServicesInfoExtractor): }] +class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor): + _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/ + (?:full-episodes|shows(?=/[^/]+/full-episodes)) + /(?P<id>[^?]+)''' + _FEED_URL = 'http://comedycentral.com/feeds/mrss/' + + _TESTS = [{ + 'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028', + 'info_dict': { + 'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."', + 'title': 'November 28, 2016 - Ryan Speedo Green', + }, + 'playlist_count': 4, + }, { + 'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + feed_json = self._search_regex(r'var triforceManifestFeed\s*=\s*(\{.+?\});\n', webpage, 'triforce feeed') + feed = self._parse_json(feed_json, playlist_id) + zones = feed['manifest']['zones'] + + video_zone = zones['t2_lc_promo1'] + feed = self._download_json(video_zone['feed'], playlist_id) + mgid = feed['result']['data']['id'] + + videos_info = self._get_videos_info(mgid) + return videos_info + + class ToshIE(MTVServicesInfoExtractor): IE_DESC = 'Tosh.0' _VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)' diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5f4c984a9..05c51fac9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -886,7 +886,7 @@ class InfoExtractor(object): 'url': e.get('contentUrl'), 'title': unescapeHTML(e.get('name')), 'description': unescapeHTML(e.get('description')), - 'thumbnail': e.get('thumbnailUrl'), + 'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'), 'duration': parse_duration(e.get('duration')), 'timestamp': unified_timestamp(e.get('uploadDate')), 'filesize': float_or_none(e.get('contentSize')), @@ -1703,7 +1703,7 @@ class InfoExtractor(object): representation_ms_info['fragments'] = [{ 'url': media_template % { 'Number': segment_number, - 'Bandwidth': representation_attrib.get('bandwidth'), + 'Bandwidth': int_or_none(representation_attrib.get('bandwidth')), }, 'duration': segment_duration, } for segment_number in range( @@ -1721,7 +1721,7 @@ class InfoExtractor(object): def add_segment_url(): segment_url = media_template % { 'Time': segment_time, - 'Bandwidth': representation_attrib.get('bandwidth'), + 'Bandwidth': int_or_none(representation_attrib.get('bandwidth')), 'Number': segment_number, } representation_ms_info['fragments'].append({ diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index cc141f68e..8d5b69f68 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -236,7 +236,7 @@ class CrunchyrollIE(CrunchyrollBaseIE): output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style'] output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x'] output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y'] - output += """ScaledBorderAndShadow: yes + output += """ScaledBorderAndShadow: no [V4+ Styles] Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 578359a5e..46d007b7d 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -180,6 +180,7 @@ from .cnn import ( from .coub import CoubIE from .collegerama import CollegeRamaIE from .comedycentral import ( + ComedyCentralFullEpisodesIE, ComedyCentralIE, ComedyCentralShortnameIE, ComedyCentralTVIE, @@ -804,7 +805,6 @@ from .scivee import SciVeeIE from .screencast import ScreencastIE from .screencastomatic import ScreencastOMaticIE from .screenjunkies import ScreenJunkiesIE -from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE from .seeker import SeekerIE from .senateisvp import SenateISVPIE from .sendtonews import SendtoNewsIE @@ -897,6 +897,7 @@ from .teachertube import ( ) from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE +from .teamfourstar import TeamFourStarIE from .techtalks import TechTalksIE from .ted import TEDIE from .tele13 import Tele13IE @@ -965,6 +966,10 @@ from .tv2 import ( ) from .tv3 import TV3IE from .tv4 import TV4IE +from .tvanouvelles import ( + TVANouvellesIE, + TVANouvellesArticleIE, +) from .tvc import ( TVCIE, TVCArticleIE, @@ -1117,6 +1122,10 @@ from .wdr import ( WDRIE, WDRMobileIE, ) +from .webcaster import ( + WebcasterIE, + WebcasterFeedIE, +) from .webofstories import ( WebOfStoriesIE, WebOfStoriesPlaylistIE, diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dl/extractor/firsttv.py index 6b662cc3c..47673e2d4 100644 --- a/youtube_dl/extractor/firsttv.py +++ b/youtube_dl/extractor/firsttv.py @@ -2,7 +2,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urlparse +from ..compat import ( + compat_str, + compat_urlparse, +) from ..utils import ( int_or_none, qualities, @@ -22,8 +25,7 @@ class FirstTVIE(InfoExtractor): 'info_dict': { 'id': '40049', 'ext': 'mp4', - 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015', - 'description': 'md5:36a39c1d19618fec57d12efe212a8370', + 'title': 'Гость Людмила Сенчина. Наедине со всеми. Выпуск от 12.02.2015', 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', 'upload_date': '20150212', 'duration': 2694, @@ -34,8 +36,7 @@ class FirstTVIE(InfoExtractor): 'info_dict': { 'id': '364746', 'ext': 'mp4', - 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016', - 'description': 'md5:a242eea0031fd180a4497d52640a9572', + 'title': 'Весенняя аллергия. Доброе утро. Фрагмент выпуска от 07.04.2016', 'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', 'upload_date': '20160407', 'duration': 179, @@ -44,6 +45,17 @@ class FirstTVIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + 'url': 'http://www.1tv.ru/news/issue/2016-12-01/14:00', + 'info_dict': { + 'id': '14:00', + 'title': 'Выпуск новостей в 14:00 1 декабря 2016 года. Новости. Первый канал', + 'description': 'md5:2e921b948f8c1ff93901da78ebdb1dfd', + }, + 'playlist_count': 13, + }, { + 'url': 'http://www.1tv.ru/shows/tochvtoch-supersezon/vystupleniya/evgeniy-dyatlov-vladimir-vysockiy-koni-priveredlivye-toch-v-toch-supersezon-fragment-vypuska-ot-06-11-2016', + 'only_matching': True, }] def _real_extract(self, url): @@ -51,43 +63,66 @@ class FirstTVIE(InfoExtractor): webpage = self._download_webpage(url, display_id) playlist_url = compat_urlparse.urljoin(url, self._search_regex( - r'data-playlist-url="([^"]+)', webpage, 'playlist url')) + r'data-playlist-url=(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'playlist url', group='url')) - item = self._download_json(playlist_url, display_id)[0] - video_id = item['id'] - quality = qualities(('ld', 'sd', 'hd', )) - formats = [] - for f in item.get('mbr', []): - src = f.get('src') - if not src: - continue - fname = f.get('name') - formats.append({ - 'url': src, - 'format_id': fname, - 'quality': quality(fname), + parsed_url = compat_urlparse.urlparse(playlist_url) + qs = compat_urlparse.parse_qs(parsed_url.query) + item_ids = qs.get('videos_ids[]') or qs.get('news_ids[]') + + items = self._download_json(playlist_url, display_id) + + if item_ids: + items = [ + item for item in items + if item.get('uid') and compat_str(item['uid']) in item_ids] + else: + items = [items[0]] + + entries = [] + QUALITIES = ('ld', 'sd', 'hd', ) + + for item in items: + title = item['title'] + quality = qualities(QUALITIES) + formats = [] + for f in item.get('mbr', []): + src = f.get('src') + if not src or not isinstance(src, compat_str): + continue + tbr = int_or_none(self._search_regex( + r'_(\d{3,})\.mp4', src, 'tbr', default=None)) + formats.append({ + 'url': src, + 'format_id': f.get('name'), + 'tbr': tbr, + 'quality': quality(f.get('name')), + }) + self._sort_formats(formats) + + thumbnail = item.get('poster') or self._og_search_thumbnail(webpage) + duration = int_or_none(item.get('duration') or self._html_search_meta( + 'video:duration', webpage, 'video duration', fatal=False)) + upload_date = unified_strdate(self._html_search_meta( + 'ya:ovs:upload_date', webpage, 'upload date', default=None)) + + entries.append({ + 'id': compat_str(item.get('id') or item['uid']), + 'thumbnail': thumbnail, + 'title': title, + 'upload_date': upload_date, + 'duration': int_or_none(duration), + 'formats': formats }) - self._sort_formats(formats) title = self._html_search_regex( (r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', r"'title'\s*:\s*'([^']+)'"), - webpage, 'title', default=None) or item['title'] + webpage, 'title', default=None) or self._og_search_title( + webpage, default=None) description = self._html_search_regex( r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', webpage, 'description', default=None) or self._html_search_meta( - 'description', webpage, 'description') - duration = int_or_none(self._html_search_meta( - 'video:duration', webpage, 'video duration', fatal=False)) - upload_date = unified_strdate(self._html_search_meta( - 'ya:ovs:upload_date', webpage, 'upload date', fatal=False)) + 'description', webpage, 'description', default=None) - return { - 'id': video_id, - 'thumbnail': item.get('poster') or self._og_search_thumbnail(webpage), - 'title': title, - 'description': description, - 'upload_date': upload_date, - 'duration': int_or_none(duration), - 'formats': formats - } + return self.playlist_result(entries, display_id, title, description) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 8c5ffc9e8..f2928b5fe 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -28,6 +28,9 @@ class FunnyOrDieIE(InfoExtractor): 'description': 'Please use this to sell something. www.jonlajoie.com', 'thumbnail': 're:^http:.*\.jpg$', }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'http://www.funnyordie.com/articles/ebf5e34fc8/10-hours-of-walking-in-nyc-as-a-man', 'only_matching': True, @@ -51,19 +54,45 @@ class FunnyOrDieIE(InfoExtractor): formats = [] - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False) + source_formats = list(filter( + lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', + m3u8_formats)) - bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)[,/]', m3u8_url)] + bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)(?=[,/])', m3u8_url)] bitrates.sort() - for bitrate in bitrates: - for link in links: - formats.append({ - 'url': self._proto_relative_url('%s%d.%s' % (link[0], bitrate, link[1])), - 'format_id': '%s-%d' % (link[1], bitrate), - 'vbr': bitrate, - }) + if source_formats: + self._sort_formats(source_formats) + + for bitrate, f in zip(bitrates, source_formats or [{}] * len(bitrates)): + for path, ext in links: + ff = f.copy() + if ff: + if ext != 'mp4': + ff = dict( + [(k, v) for k, v in ff.items() + if k in ('height', 'width', 'format_id')]) + ff.update({ + 'format_id': ff['format_id'].replace('hls', ext), + 'ext': ext, + 'protocol': 'http', + }) + else: + ff.update({ + 'format_id': '%s-%d' % (ext, bitrate), + 'vbr': bitrate, + }) + ff['url'] = self._proto_relative_url( + '%s%d.%s' % (path, bitrate, ext)) + formats.append(ff) + self._check_formats(formats, video_id) + + formats.extend(m3u8_formats) + self._sort_formats( + formats, field_preference=('height', 'width', 'tbr', 'format_id')) subtitles = {} for src, src_lang in re.findall(r'<track kind="captions" src="([^"]+)" srclang="([^"]+)"', webpage): diff --git a/youtube_dl/extractor/fusion.py b/youtube_dl/extractor/fusion.py index b4ab4cbb7..ede729b52 100644 --- a/youtube_dl/extractor/fusion.py +++ b/youtube_dl/extractor/fusion.py @@ -29,7 +29,7 @@ class FusionIE(InfoExtractor): webpage = self._download_webpage(url, display_id) ooyala_code = self._search_regex( - r'data-video-id=(["\'])(?P<code>.+?)\1', + r'data-ooyala-id=(["\'])(?P<code>(?:(?!\1).)+)\1', webpage, 'ooyala code', group='code') return OoyalaIE._build_url_result(ooyala_code) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index bde65fa27..3949c8bf7 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -56,10 +56,10 @@ from .dailymotion import ( ) from .onionstudios import OnionStudiosIE from .viewlift import ViewLiftEmbedIE -from .screenwavemedia import ScreenwaveMediaIE from .mtv import MTVServicesEmbeddedIE from .pladform import PladformIE from .videomore import VideomoreIE +from .webcaster import WebcasterFeedIE from .googledrive import GoogleDriveIE from .jwplatform import JWPlatformIE from .digiteka import DigitekaIE @@ -1189,16 +1189,6 @@ class GenericIE(InfoExtractor): 'duration': 248.667, }, }, - # ScreenwaveMedia embed - { - 'url': 'http://www.thecinemasnob.com/the-cinema-snob/a-nightmare-on-elm-street-2-freddys-revenge1', - 'md5': '24ace5baba0d35d55c6810b51f34e9e0', - 'info_dict': { - 'id': 'cinemasnob-55d26273809dd', - 'ext': 'mp4', - 'title': 'cinemasnob', - }, - }, # BrightcoveInPageEmbed embed { 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/', @@ -2140,6 +2130,11 @@ class GenericIE(InfoExtractor): if videomore_url: return self.url_result(videomore_url) + # Look for Webcaster embeds + webcaster_url = WebcasterFeedIE._extract_url(self, webpage) + if webcaster_url: + return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key()) + # Look for Playwire embeds mobj = re.search( r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage) @@ -2206,11 +2201,6 @@ class GenericIE(InfoExtractor): if jwplatform_url: return self.url_result(jwplatform_url, 'JWPlatform') - # Look for ScreenwaveMedia embeds - mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage) - if mobj is not None: - return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia') - # Look for Digiteka embeds digiteka_url = DigitekaIE._extract_url(webpage) if digiteka_url: @@ -2232,6 +2222,16 @@ class GenericIE(InfoExtractor): return self.url_result('limelight:%s:%s' % ( lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2)) + mobj = re.search( + r'''(?sx) + <object[^>]+class=(["\'])LimelightEmbeddedPlayerFlash\1[^>]*>.*? + <param[^>]+ + name=(["\'])flashVars\2[^>]+ + value=(["\'])(?:(?!\3).)*mediaId=(?P<id>[a-z0-9]{32}) + ''', webpage) + if mobj: + return self.url_result('limelight:media:%s' % mobj.group('id')) + # Look for AdobeTVVideo embeds mobj = re.search( r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]', diff --git a/youtube_dl/extractor/hellporno.py b/youtube_dl/extractor/hellporno.py index 7a1c75b65..10da14067 100644 --- a/youtube_dl/extractor/hellporno.py +++ b/youtube_dl/extractor/hellporno.py @@ -6,12 +6,13 @@ from .common import InfoExtractor from ..utils import ( js_to_json, remove_end, + determine_ext, ) class HellPornoIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?hellporno\.com/videos/(?P<id>[^/]+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P<id>[^/]+)' + _TESTS = [{ 'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/', 'md5': '1fee339c610d2049699ef2aa699439f1', 'info_dict': { @@ -22,7 +23,10 @@ class HellPornoIE(InfoExtractor): 'thumbnail': 're:https?://.*\.jpg$', 'age_limit': 18, } - } + }, { + 'url': 'http://hellporno.net/v/186271/', + 'only_matching': True, + }] def _real_extract(self, url): display_id = self._match_id(url) @@ -38,7 +42,7 @@ class HellPornoIE(InfoExtractor): video_id = flashvars.get('video_id') thumbnail = flashvars.get('preview_url') - ext = flashvars.get('postfix', '.mp4')[1:] + ext = determine_ext(flashvars.get('postfix'), 'mp4') formats = [] for video_url_key in ['video_url', 'video_alt_url']: diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dl/extractor/liveleak.py index ea0565ac0..b84e4dd6c 100644 --- a/youtube_dl/extractor/liveleak.py +++ b/youtube_dl/extractor/liveleak.py @@ -54,6 +54,22 @@ class LiveLeakIE(InfoExtractor): 'title': 'Crazy Hungarian tourist films close call waterspout in Croatia', 'thumbnail': 're:^https?://.*\.jpg$' } + }, { + # Covers https://github.com/rg3/youtube-dl/pull/10664#issuecomment-247439521 + 'url': 'http://m.liveleak.com/view?i=763_1473349649', + 'add_ie': ['Youtube'], + 'info_dict': { + 'id': '763_1473349649', + 'ext': 'mp4', + 'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty', + 'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.', + 'uploader': 'Ziz', + 'upload_date': '20160908', + 'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw' + }, + 'params': { + 'skip_download': True, + }, }] @staticmethod @@ -87,7 +103,7 @@ class LiveLeakIE(InfoExtractor): else: # Maybe an embed? embed_url = self._search_regex( - r'<iframe[^>]+src="(http://www.prochan.com/embed\?[^"]+)"', + r'<iframe[^>]+src="(https?://(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"', webpage, 'embed URL') return { '_type': 'url_transparent', @@ -107,6 +123,7 @@ class LiveLeakIE(InfoExtractor): 'format_note': s.get('label'), 'url': s['file'], } for i, s in enumerate(sources)] + for i, s in enumerate(sources): # Removing '.h264_*.mp4' gives the raw video, which is essentially # the same video without the LiveLeak logo at the top (see diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index c41ab1e91..f577836be 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -75,7 +75,7 @@ class MiTeleBaseIE(InfoExtractor): class MiTeleIE(InfoExtractor): IE_DESC = 'mitele.es' - _VALID_URL = r'https?://(?:www\.)?mitele\.es/programas-tv/(?:[^/]+/)(?P<id>[^/]+)/player' + _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player' _TESTS = [{ 'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player', @@ -86,7 +86,10 @@ class MiTeleIE(InfoExtractor): 'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', 'series': 'Diario de', 'season': 'La redacción', + 'season_number': 14, + 'season_id': 'diario_de_t14_11981', 'episode': 'Programa 144', + 'episode_number': 3, 'thumbnail': 're:(?i)^https?://.*\.jpg$', 'duration': 2913, }, @@ -101,7 +104,10 @@ class MiTeleIE(InfoExtractor): 'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f', 'series': 'Cuarto Milenio', 'season': 'Temporada 6', + 'season_number': 6, + 'season_id': 'cuarto_milenio_t06_12715', 'episode': 'Programa 226', + 'episode_number': 24, 'thumbnail': 're:(?i)^https?://.*\.jpg$', 'duration': 7313, }, @@ -109,41 +115,77 @@ class MiTeleIE(InfoExtractor): 'skip_download': True, }, 'add_ie': ['Ooyala'], + }, { + 'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player', + 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - gigya_url = self._search_regex(r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s*src="([^"]*)">[^>]*</script>', webpage, 'gigya', default=None) - gigya_sc = self._download_webpage(compat_urlparse.urljoin(r'http://www.mitele.es/', gigya_url), video_id, 'Downloading gigya script') + gigya_url = self._search_regex( + r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s+src="([^"]*)">[^>]*</script>', + webpage, 'gigya', default=None) + gigya_sc = self._download_webpage( + compat_urlparse.urljoin('http://www.mitele.es/', gigya_url), + video_id, 'Downloading gigya script') + # Get a appKey/uuid for getting the session key - appKey_var = self._search_regex(r'value\("appGridApplicationKey",([0-9a-f]+)\)', gigya_sc, 'appKey variable') - appKey = self._search_regex(r'var %s="([0-9a-f]+)"' % appKey_var, gigya_sc, 'appKey') - uid = compat_str(uuid.uuid4()) - session_url = 'https://appgrid-api.cloud.accedo.tv/session?appKey=%s&uuid=%s' % (appKey, uid) - session_json = self._download_json(session_url, video_id, 'Downloading session keys') - sessionKey = compat_str(session_json['sessionKey']) + appKey_var = self._search_regex( + r'value\s*\(\s*["\']appGridApplicationKey["\']\s*,\s*([0-9a-f]+)', + gigya_sc, 'appKey variable') + appKey = self._search_regex( + r'var\s+%s\s*=\s*["\']([0-9a-f]+)' % appKey_var, gigya_sc, 'appKey') + + session_json = self._download_json( + 'https://appgrid-api.cloud.accedo.tv/session', + video_id, 'Downloading session keys', query={ + 'appKey': appKey, + 'uuid': compat_str(uuid.uuid4()), + }) + + paths = self._download_json( + 'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration', + video_id, 'Downloading paths JSON', + query={'sessionKey': compat_str(session_json['sessionKey'])}) - paths_url = 'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration?sessionKey=' + sessionKey - paths = self._download_json(paths_url, video_id, 'Downloading paths JSON') ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search'] - data_p = ( - 'http://' + ooyala_s['base_url'] + ooyala_s['full_path'] + ooyala_s['provider_id'] + - '/docs/' + video_id + '?include_titles=Series,Season&product_name=test&format=full') - data = self._download_json(data_p, video_id, 'Downloading data JSON') - source = data['hits']['hits'][0]['_source'] - embedCode = source['offers'][0]['embed_codes'][0] + source = self._download_json( + 'http://%s%s%s/docs/%s' % ( + ooyala_s['base_url'], ooyala_s['full_path'], + ooyala_s['provider_id'], video_id), + video_id, 'Downloading data JSON', query={ + 'include_titles': 'Series,Season', + 'product_name': 'test', + 'format': 'full', + })['hits']['hits'][0]['_source'] + embedCode = source['offers'][0]['embed_codes'][0] titles = source['localizable_titles'][0] + title = titles.get('title_medium') or titles['title_long'] - episode = titles['title_sort_name'] - description = titles['summary_long'] - titles_series = source['localizable_titles_series'][0] - series = titles_series['title_long'] - titles_season = source['localizable_titles_season'][0] - season = titles_season['title_medium'] - duration = parse_duration(source['videos'][0]['duration']) + + description = titles.get('summary_long') or titles.get('summary_medium') + + def get(key1, key2): + value1 = source.get(key1) + if not value1 or not isinstance(value1, list): + return + if not isinstance(value1[0], dict): + return + return value1[0].get(key2) + + series = get('localizable_titles_series', 'title_medium') + + season = get('localizable_titles_season', 'title_medium') + season_number = int_or_none(source.get('season_number')) + season_id = source.get('season_id') + + episode = titles.get('title_sort_name') + episode_number = int_or_none(source.get('episode_number')) + + duration = parse_duration(get('videos', 'duration')) return { '_type': 'url_transparent', @@ -154,7 +196,10 @@ class MiTeleIE(InfoExtractor): 'description': description, 'series': series, 'season': season, + 'season_number': season_number, + 'season_id': season_id, 'episode': episode, + 'episode_number': episode_number, 'duration': duration, - 'thumbnail': source['images'][0]['url'], + 'thumbnail': get('images', 'url'), } diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 74a3a035e..03351917e 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -13,6 +13,7 @@ from ..utils import ( fix_xml_ampersands, float_or_none, HEADRequest, + NO_DEFAULT, RegexNotFoundError, sanitized_Request, strip_or_none, @@ -201,7 +202,7 @@ class MTVServicesInfoExtractor(InfoExtractor): [self._get_video_info(item) for item in idoc.findall('.//item')], playlist_title=title, playlist_description=description) - def _extract_mgid(self, webpage): + def _extract_mgid(self, webpage, default=NO_DEFAULT): try: # the url can be http://media.mtvnservices.com/fb/{mgid}.swf # or http://media.mtvnservices.com/{mgid} @@ -221,7 +222,7 @@ class MTVServicesInfoExtractor(InfoExtractor): sm4_embed = self._html_search_meta( 'sm4:video:embed', webpage, 'sm4 embed', default='') mgid = self._search_regex( - r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid') + r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=default) return mgid def _real_extract(self, url): diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dl/extractor/normalboots.py index 6aa0895b8..61fe571df 100644 --- a/youtube_dl/extractor/normalboots.py +++ b/youtube_dl/extractor/normalboots.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .screenwavemedia import ScreenwaveMediaIE +from .jwplatform import JWPlatformIE from ..utils import ( unified_strdate, @@ -25,7 +25,7 @@ class NormalbootsIE(InfoExtractor): # m3u8 download 'skip_download': True, }, - 'add_ie': ['ScreenwaveMedia'], + 'add_ie': ['JWPlatform'], } def _real_extract(self, url): @@ -39,15 +39,13 @@ class NormalbootsIE(InfoExtractor): r'<span style="text-transform:uppercase; font-size:inherit;">[A-Za-z]+, (?P<date>.*)</span>', webpage, 'date', fatal=False)) - screenwavemedia_url = self._html_search_regex( - ScreenwaveMediaIE.EMBED_PATTERN, webpage, 'screenwave URL', - group='url') + jwplatform_url = JWPlatformIE._extract_url(webpage) return { '_type': 'url_transparent', 'id': video_id, - 'url': screenwavemedia_url, - 'ie_key': ScreenwaveMediaIE.ie_key(), + 'url': jwplatform_url, + 'ie_key': JWPlatformIE.ie_key(), 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 3700b7ab2..c89aac63e 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import random import re from .common import InfoExtractor @@ -14,6 +15,25 @@ from ..utils import ( class NRKBaseIE(InfoExtractor): + _faked_ip = None + + def _download_webpage_handle(self, *args, **kwargs): + # NRK checks X-Forwarded-For HTTP header in order to figure out the + # origin of the client behind proxy. This allows to bypass geo + # restriction by faking this header's value to some Norway IP. + # We will do so once we encounter any geo restriction error. + if self._faked_ip: + # NB: str is intentional + kwargs.setdefault(str('headers'), {})['X-Forwarded-For'] = self._faked_ip + return super(NRKBaseIE, self)._download_webpage_handle(*args, **kwargs) + + def _fake_ip(self): + # Use fake IP from 37.191.128.0/17 in order to workaround geo + # restriction + def octet(lb=0, ub=255): + return random.randint(lb, ub) + self._faked_ip = '37.191.%d.%d' % (octet(128), octet()) + def _real_extract(self, url): video_id = self._match_id(url) @@ -24,6 +44,8 @@ class NRKBaseIE(InfoExtractor): title = data.get('fullTitle') or data.get('mainTitle') or data['title'] video_id = data.get('id') or video_id + http_headers = {'X-Forwarded-For': self._faked_ip} if self._faked_ip else {} + entries = [] media_assets = data.get('mediaAssets') @@ -54,6 +76,7 @@ class NRKBaseIE(InfoExtractor): 'duration': duration, 'subtitles': subtitles, 'formats': formats, + 'http_headers': http_headers, }) if not entries: @@ -70,10 +93,23 @@ class NRKBaseIE(InfoExtractor): }] if not entries: - if data.get('usageRights', {}).get('isGeoBlocked'): - raise ExtractorError( - 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', - expected=True) + message_type = data.get('messageType', '') + # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked* + if 'IsGeoBlocked' in message_type and not self._faked_ip: + self.report_warning( + 'Video is geo restricted, trying to fake IP') + self._fake_ip() + return self._real_extract(url) + + MESSAGES = { + 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet', + 'ProgramRightsHasExpired': 'Programmet har gått ut', + 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', + } + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, MESSAGES.get( + message_type, message_type)), + expected=True) conviva = data.get('convivaStatistics') or {} series = conviva.get('seriesName') or data.get('seriesTitle') diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index d3d4101de..7f19b1ba5 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals, division +import re + from .common import InfoExtractor from ..compat import ( compat_chr, @@ -10,6 +12,10 @@ from ..utils import ( determine_ext, ExtractorError, ) +from ..jsinterp import ( + JSInterpreter, + _NAME_RE +) class OpenloadIE(InfoExtractor): @@ -56,6 +62,44 @@ class OpenloadIE(InfoExtractor): 'only_matching': True, }] + def openload_decode(self, txt): + symbol_dict = { + '(゚Д゚) [゚Θ゚]': '_', + '(゚Д゚) [゚ω゚ノ]': 'a', + '(゚Д゚) [゚Θ゚ノ]': 'b', + '(゚Д゚) [\'c\']': 'c', + '(゚Д゚) [゚ー゚ノ]': 'd', + '(゚Д゚) [゚Д゚ノ]': 'e', + '(゚Д゚) [1]': 'f', + '(゚Д゚) [\'o\']': 'o', + '(o゚ー゚o)': 'u', + '(゚Д゚) [\'c\']': 'c', + '((゚ー゚) + (o^_^o))': '7', + '((o^_^o) +(o^_^o) +(c^_^o))': '6', + '((゚ー゚) + (゚Θ゚))': '5', + '(-~3)': '4', + '(-~-~1)': '3', + '(-~1)': '2', + '(-~0)': '1', + '((c^_^o)-(c^_^o))': '0', + } + delim = '(゚Д゚)[゚ε゚]+' + end_token = '(゚Д゚)[゚o゚]' + symbols = '|'.join(map(re.escape, symbol_dict.keys())) + txt = re.sub('(%s)\+\s?' % symbols, lambda m: symbol_dict[m.group(1)], txt) + ret = '' + for aacode in re.findall(r'{0}\+\s?{1}(.*?){0}'.format(re.escape(end_token), re.escape(delim)), txt): + for aachar in aacode.split(delim): + if aachar.isdigit(): + ret += compat_chr(int(aachar, 8)) + else: + m = re.match(r'^u([\da-f]{4})$', aachar) + if m: + ret += compat_chr(int(m.group(1), 16)) + else: + self.report_warning("Cannot decode: %s" % aachar) + return ret + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage('https://openload.co/embed/%s/' % video_id, video_id) @@ -70,19 +114,26 @@ class OpenloadIE(InfoExtractor): r'<span[^>]*>([^<]+)</span>\s*<span[^>]*>[^<]+</span>\s*<span[^>]+id="streamurl"', webpage, 'encrypted data') - magic = compat_ord(enc_data[-1]) + enc_code = self._html_search_regex(r'<script[^>]+>(゚ω゚[^<]+)</script>', + webpage, 'encrypted code') + + js_code = self.openload_decode(enc_code) + jsi = JSInterpreter(js_code) + + m_offset_fun = self._search_regex(r'slice\(0\s*-\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript offset function') + m_diff_fun = self._search_regex(r'charCodeAt\(0\)\s*\+\s*(%s)\(\)' % _NAME_RE, js_code, 'javascript diff function') + + offset = jsi.call_function(m_offset_fun) + diff = jsi.call_function(m_diff_fun) + video_url_chars = [] for idx, c in enumerate(enc_data): j = compat_ord(c) - if j == magic: - j -= 1 - elif j == magic - 1: - j += 1 if j >= 33 and j <= 126: j = ((j + 14) % 94) + 33 - if idx == len(enc_data) - 1: - j += 3 + if idx == len(enc_data) - offset: + j += diff video_url_chars += compat_chr(j) video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) diff --git a/youtube_dl/extractor/plays.py b/youtube_dl/extractor/plays.py index c3c38cf4a..ddfc6f148 100644 --- a/youtube_dl/extractor/plays.py +++ b/youtube_dl/extractor/plays.py @@ -8,30 +8,31 @@ from ..utils import int_or_none class PlaysTVIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P<id>[0-9a-f]{18})' - _TEST = { - 'url': 'http://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall', + _VALID_URL = r'https?://(?:www\.)?plays\.tv/(?:video|embeds)/(?P<id>[0-9a-f]{18})' + _TESTS = [{ + 'url': 'https://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall', 'md5': 'dfeac1198506652b5257a62762cec7bc', 'info_dict': { 'id': '56af17f56c95335490', 'ext': 'mp4', - 'title': 'When you outplay the Azir wall', + 'title': 'Bjergsen - When you outplay the Azir wall', 'description': 'Posted by Bjergsen', } - } + }, { + 'url': 'https://plays.tv/embeds/56af17f56c95335490', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage( + 'https://plays.tv/video/%s' % video_id, video_id) + + info = self._search_json_ld(webpage, video_id,) - title = self._og_search_title(webpage) - content = self._parse_json( - self._search_regex( - r'R\.bindContent\(({.+?})\);', webpage, - 'content'), video_id)['content'] mpd_url, sources = re.search( r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>', - content).groups() + webpage).groups() formats = self._extract_mpd_formats( self._proto_relative_url(mpd_url), video_id, mpd_id='DASH') for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources): @@ -42,10 +43,11 @@ class PlaysTVIE(InfoExtractor): }) self._sort_formats(formats) - return { + info.update({ 'id': video_id, - 'title': title, 'description': self._og_search_description(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), + 'thumbnail': info.get('thumbnail') or self._og_search_thumbnail(webpage), 'formats': formats, - } + }) + + return info diff --git a/youtube_dl/extractor/puls4.py b/youtube_dl/extractor/puls4.py index 1c54af002..80091b85f 100644 --- a/youtube_dl/extractor/puls4.py +++ b/youtube_dl/extractor/puls4.py @@ -10,7 +10,7 @@ from ..utils import ( class Puls4IE(ProSiebenSat1BaseIE): - _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>(?:[^/]+/)*?videos/[^?#]+)' + _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>[^?#&]+)' _TESTS = [{ 'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118', 'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03', @@ -22,6 +22,12 @@ class Puls4IE(ProSiebenSat1BaseIE): 'upload_date': '20160830', 'uploader': 'PULS_4', }, + }, { + 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident.-Norbert-Hofer', + 'only_matching': True, + }, { + 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident-Analyse-des-Interviews-mit-Norbert-Hofer-416598', + 'only_matching': True, }] _TOKEN = 'puls4' _SALT = '01!kaNgaiNgah1Ie4AeSha' diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py index 2fce4e81b..6db3e3e93 100644 --- a/youtube_dl/extractor/ruutu.py +++ b/youtube_dl/extractor/ruutu.py @@ -5,6 +5,7 @@ from .common import InfoExtractor from ..compat import compat_urllib_parse_urlparse from ..utils import ( determine_ext, + ExtractorError, int_or_none, xpath_attr, xpath_text, @@ -101,6 +102,11 @@ class RuutuIE(InfoExtractor): }) extract_formats(video_xml.find('./Clip')) + + drm = xpath_text(video_xml, './Clip/DRM', default=None) + if not formats and drm: + raise ExtractorError('This video is DRM protected.', expected=True) + self._sort_formats(formats) return { diff --git a/youtube_dl/extractor/screenwavemedia.py b/youtube_dl/extractor/screenwavemedia.py deleted file mode 100644 index 7d77e8825..000000000 --- a/youtube_dl/extractor/screenwavemedia.py +++ /dev/null @@ -1,146 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - int_or_none, - unified_strdate, - js_to_json, -) - - -class ScreenwaveMediaIE(InfoExtractor): - _VALID_URL = r'(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=(?P<id>[A-Za-z0-9-]+)' - EMBED_PATTERN = r'src=(["\'])(?P<url>(?:https?:)?//player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?.*\bid=.+?)\1' - _TESTS = [{ - 'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - playerdata = self._download_webpage( - 'http://player.screenwavemedia.com/player.php?id=%s' % video_id, - video_id, 'Downloading player webpage') - - vidtitle = self._search_regex( - r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/') - - playerconfig = self._download_webpage( - 'http://player.screenwavemedia.com/player.js', - video_id, 'Downloading playerconfig webpage') - - videoserver = self._search_regex(r'SWMServer\s*=\s*"([\d\.]+)"', playerdata, 'videoserver') - - sources = self._parse_json( - js_to_json( - re.sub( - r'(?s)/\*.*?\*/', '', - self._search_regex( - r'sources\s*:\s*(\[[^\]]+?\])', playerconfig, - 'sources', - ).replace( - "' + thisObj.options.videoserver + '", - videoserver - ).replace( - "' + playerVidId + '", - video_id - ) - ) - ), - video_id, fatal=False - ) - - # Fallback to hardcoded sources if JS changes again - if not sources: - self.report_warning('Falling back to a hardcoded list of streams') - sources = [{ - 'file': 'http://%s/vod/%s_%s.mp4' % (videoserver, video_id, format_id), - 'type': 'mp4', - 'label': format_label, - } for format_id, format_label in ( - ('low', '144p Low'), ('med', '160p Med'), ('high', '360p High'), ('hd1', '720p HD1'))] - sources.append({ - 'file': 'http://%s/vod/smil:%s.smil/playlist.m3u8' % (videoserver, video_id), - 'type': 'hls', - }) - - formats = [] - for source in sources: - file_ = source.get('file') - if not file_: - continue - if source.get('type') == 'hls': - formats.extend(self._extract_m3u8_formats(file_, video_id, ext='mp4')) - else: - format_id = self._search_regex( - r'_(.+?)\.[^.]+$', file_, 'format id', default=None) - if not self._is_valid_url(file_, video_id, format_id or 'video'): - continue - format_label = source.get('label') - height = int_or_none(self._search_regex( - r'^(\d+)[pP]', format_label, 'height', default=None)) - formats.append({ - 'url': file_, - 'format_id': format_id, - 'format': format_label, - 'ext': source.get('type'), - 'height': height, - }) - self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id')) - - return { - 'id': video_id, - 'title': vidtitle, - 'formats': formats, - } - - -class TeamFourIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?' - _TEST = { - 'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/', - 'info_dict': { - 'id': 'TeamFourStar-5292a02f20bfa', - 'ext': 'mp4', - 'upload_date': '20130401', - 'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar', - 'title': 'A Moment With TFS Episode 4', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - playerdata_url = self._search_regex( - r'src="(http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', - webpage, 'player data URL') - - video_title = self._html_search_regex( - r'<div class="heroheadingtitle">(?P<title>.+?)</div>', - webpage, 'title') - video_date = unified_strdate(self._html_search_regex( - r'<div class="heroheadingdate">(?P<date>.+?)</div>', - webpage, 'date', fatal=False)) - video_description = self._html_search_regex( - r'(?s)<div class="postcontent">(?P<description>.+?)</div>', - webpage, 'description', fatal=False) - video_thumbnail = self._og_search_thumbnail(webpage) - - return { - '_type': 'url_transparent', - 'display_id': display_id, - 'title': video_title, - 'description': video_description, - 'upload_date': video_date, - 'thumbnail': video_thumbnail, - 'url': playerdata_url, - } diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 3b7ecb3c3..5a201eaa8 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -121,7 +121,7 @@ class SoundcloudIE(InfoExtractor): }, ] - _CLIENT_ID = '02gUJC0hH2ct1EGOcYXQIzRFU91c72Ea' + _CLIENT_ID = 'fDoItMDbsbZz8dY16ZzARCZmzgHBPotA' _IPHONE_CLIENT_ID = '376f225bf427445fc4bfb6b99b72e0bf' @staticmethod diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py index 218785ee4..abfee3ece 100644 --- a/youtube_dl/extractor/spike.py +++ b/youtube_dl/extractor/spike.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .mtv import MTVServicesInfoExtractor @@ -16,6 +18,15 @@ class SpikeIE(MTVServicesInfoExtractor): 'timestamp': 1388120400, 'upload_date': '20131227', }, + }, { + 'url': 'http://www.spike.com/full-episodes/j830qm/lip-sync-battle-joel-mchale-vs-jim-rash-season-2-ep-209', + 'md5': 'b25c6f16418aefb9ad5a6cae2559321f', + 'info_dict': { + 'id': '37ace3a8-1df6-48be-85b8-38df8229e241', + 'ext': 'mp4', + 'title': 'Lip Sync Battle|April 28, 2016|2|209|Joel McHale Vs. Jim Rash|Act 1', + 'description': 'md5:a739ca8f978a7802f67f8016d27ce114', + }, }, { 'url': 'http://www.spike.com/video-clips/lhtu8m/', 'only_matching': True, @@ -32,3 +43,12 @@ class SpikeIE(MTVServicesInfoExtractor): _FEED_URL = 'http://www.spike.com/feeds/mrss/' _MOBILE_TEMPLATE = 'http://m.spike.com/videos/video.rbml?id=%s' + _CUSTOM_URL_REGEX = re.compile(r'spikenetworkapp://([^/]+/[-a-fA-F0-9]+)') + + def _extract_mgid(self, webpage): + mgid = super(SpikeIE, self)._extract_mgid(webpage, default=None) + if mgid is None: + url_parts = self._search_regex(self._CUSTOM_URL_REGEX, webpage, 'episode_id') + video_type, episode_id = url_parts.split('/', 1) + mgid = 'mgid:arc:{0}:spike.com:{1}'.format(video_type, episode_id) + return mgid diff --git a/youtube_dl/extractor/teamfourstar.py b/youtube_dl/extractor/teamfourstar.py new file mode 100644 index 000000000..a8c6ed7be --- /dev/null +++ b/youtube_dl/extractor/teamfourstar.py @@ -0,0 +1,48 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .jwplatform import JWPlatformIE +from ..utils import unified_strdate + + +class TeamFourStarIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/(?P<id>[a-z0-9\-]+)' + _TEST = { + 'url': 'http://teamfourstar.com/tfs-abridged-parody-episode-1-2/', + 'info_dict': { + 'id': '0WdZO31W', + 'title': 'TFS Abridged Parody Episode 1', + 'description': 'md5:d60bc389588ebab2ee7ad432bda953ae', + 'ext': 'mp4', + 'timestamp': 1394168400, + 'upload_date': '20080508', + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + jwplatform_url = JWPlatformIE._extract_url(webpage) + + video_title = self._html_search_regex( + r'<h1[^>]+class="entry-title"[^>]*>(?P<title>.+?)</h1>', + webpage, 'title') + video_date = unified_strdate(self._html_search_regex( + r'<span[^>]+class="meta-date date updated"[^>]*>(?P<date>.+?)</span>', + webpage, 'date', fatal=False)) + video_description = self._html_search_regex( + r'(?s)<div[^>]+class="content-inner"[^>]*>.*?(?P<description><p>.+?)</div>', + webpage, 'description', fatal=False) + video_thumbnail = self._og_search_thumbnail(webpage) + + return { + '_type': 'url_transparent', + 'display_id': display_id, + 'title': video_title, + 'description': video_description, + 'upload_date': video_date, + 'thumbnail': video_thumbnail, + 'url': jwplatform_url, + } diff --git a/youtube_dl/extractor/telebruxelles.py b/youtube_dl/extractor/telebruxelles.py index eefecc490..5886e9c1b 100644 --- a/youtube_dl/extractor/telebruxelles.py +++ b/youtube_dl/extractor/telebruxelles.py @@ -7,33 +7,30 @@ from .common import InfoExtractor class TeleBruxellesIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt)/?(?P<id>[^/#?]+)' + _VALID_URL = r'https?://(?:www\.)?(?:telebruxelles|bx1)\.be/(news|sport|dernier-jt|emission)/?(?P<id>[^/#?]+)' _TESTS = [{ - 'url': 'http://www.telebruxelles.be/news/auditions-devant-parlement-francken-galant-tres-attendus/', - 'md5': '59439e568c9ee42fb77588b2096b214f', + 'url': 'http://bx1.be/news/que-risque-lauteur-dune-fausse-alerte-a-la-bombe/', + 'md5': 'a2a67a5b1c3e8c9d33109b902f474fd9', 'info_dict': { - 'id': '11942', - 'display_id': 'auditions-devant-parlement-francken-galant-tres-attendus', - 'ext': 'flv', - 'title': 'Parlement : Francken et Galant répondent aux interpellations de l’opposition', - 'description': 're:Les auditions des ministres se poursuivent*' - }, - 'params': { - 'skip_download': 'requires rtmpdump' + 'id': '158856', + 'display_id': 'que-risque-lauteur-dune-fausse-alerte-a-la-bombe', + 'ext': 'mp4', + 'title': 'Que risque l’auteur d’une fausse alerte à la bombe ?', + 'description': 'md5:3cf8df235d44ebc5426373050840e466', }, }, { - 'url': 'http://www.telebruxelles.be/sport/basket-brussels-bat-mons-80-74/', - 'md5': '181d3fbdcf20b909309e5aef5c6c6047', + 'url': 'http://bx1.be/sport/futsal-schaerbeek-sincline-5-3-a-thulin/', + 'md5': 'dfe07ecc9c153ceba8582ac912687675', 'info_dict': { - 'id': '10091', - 'display_id': 'basket-brussels-bat-mons-80-74', - 'ext': 'flv', - 'title': 'Basket : le Brussels bat Mons 80-74', - 'description': 're:^Ils l\u2019on fait ! En basket, le B*', - }, - 'params': { - 'skip_download': 'requires rtmpdump' + 'id': '158433', + 'display_id': 'futsal-schaerbeek-sincline-5-3-a-thulin', + 'ext': 'mp4', + 'title': 'Futsal : Schaerbeek s’incline 5-3 à Thulin', + 'description': 'md5:fd013f1488d5e2dceb9cebe39e2d569b', }, + }, { + 'url': 'http://bx1.be/emission/bxenf1-gastronomie/', + 'only_matching': True, }] def _real_extract(self, url): @@ -50,13 +47,13 @@ class TeleBruxellesIE(InfoExtractor): r'file\s*:\s*"(rtmp://[^/]+/vod/mp4:"\s*\+\s*"[^"]+"\s*\+\s*".mp4)"', webpage, 'RTMP url') rtmp_url = re.sub(r'"\s*\+\s*"', '', rtmp_url) + formats = self._extract_wowza_formats(rtmp_url, article_id or display_id) + self._sort_formats(formats) return { 'id': article_id or display_id, 'display_id': display_id, 'title': title, 'description': description, - 'url': rtmp_url, - 'ext': 'flv', - 'rtmp_live': True # if rtmpdump is not called with "--live" argument, the download is blocked and can be completed + 'formats': formats, } diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py index 7629f0d10..197258df1 100644 --- a/youtube_dl/extractor/thisoldhouse.py +++ b/youtube_dl/extractor/thisoldhouse.py @@ -5,10 +5,10 @@ from .common import InfoExtractor class ThisOldHouseIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to)/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode)/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench', - 'md5': '568acf9ca25a639f0c4ff905826b662f', + 'md5': '946f05bbaa12a33f9ae35580d2dfcfe3', 'info_dict': { 'id': '2REGtUDQ', 'ext': 'mp4', @@ -20,6 +20,9 @@ class ThisOldHouseIE(InfoExtractor): }, { 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins', 'only_matching': True, + }, { + 'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 573f2ff6b..26d770992 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -56,7 +56,7 @@ class TouTvIE(InfoExtractor): 'state': state, }) login_form = self._search_regex( - r'(?s)(<form[^>]+id="Form-login".+?</form>)', login_webpage, 'login form') + r'(?s)(<form[^>]+(?:id|name)="Form-login".+?</form>)', login_webpage, 'login form') form_data = self._hidden_inputs(login_form) form_data.update({ 'login-email': email, diff --git a/youtube_dl/extractor/tvanouvelles.py b/youtube_dl/extractor/tvanouvelles.py new file mode 100644 index 000000000..1086176a2 --- /dev/null +++ b/youtube_dl/extractor/tvanouvelles.py @@ -0,0 +1,65 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .brightcove import BrightcoveNewIE + + +class TVANouvellesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tvanouvelles\.ca/videos/(?P<id>\d+)' + _TEST = { + 'url': 'http://www.tvanouvelles.ca/videos/5117035533001', + 'info_dict': { + 'id': '5117035533001', + 'ext': 'mp4', + 'title': 'L’industrie du taxi dénonce l’entente entre Québec et Uber: explications', + 'description': 'md5:479653b7c8cf115747bf5118066bd8b3', + 'uploader_id': '1741764581', + 'timestamp': 1473352030, + 'upload_date': '20160908', + }, + 'add_ie': ['BrightcoveNew'], + } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1741764581/default_default/index.html?videoId=%s' + + def _real_extract(self, url): + brightcove_id = self._match_id(url) + return self.url_result( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + BrightcoveNewIE.ie_key(), brightcove_id) + + +class TVANouvellesArticleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tvanouvelles\.ca/(?:[^/]+/)+(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'http://www.tvanouvelles.ca/2016/11/17/des-policiers-qui-ont-la-meche-un-peu-courte', + 'info_dict': { + 'id': 'des-policiers-qui-ont-la-meche-un-peu-courte', + 'title': 'Des policiers qui ont «la mèche un peu courte»?', + 'description': 'md5:92d363c8eb0f0f030de9a4a84a90a3a0', + }, + 'playlist_mincount': 4, + } + + @classmethod + def suitable(cls, url): + return False if TVANouvellesIE.suitable(url) else super(TVANouvellesArticleIE, cls).suitable(url) + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + entries = [ + self.url_result( + 'http://www.tvanouvelles.ca/videos/%s' % mobj.group('id'), + ie=TVANouvellesIE.ie_key(), video_id=mobj.group('id')) + for mobj in re.finditer( + r'data-video-id=(["\'])?(?P<id>\d+)', webpage)] + + title = self._og_search_title(webpage, fatal=False) + description = self._og_search_description(webpage) + + return self.playlist_result(entries, display_id, title, description) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 3411fcf7e..ac0b221b4 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -25,7 +25,7 @@ class TwitterBaseIE(InfoExtractor): class TwitterCardIE(TwitterBaseIE): IE_NAME = 'twitter:card' - _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos/tweet)/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)' _TESTS = [ { 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889', @@ -84,6 +84,9 @@ class TwitterCardIE(TwitterBaseIE): 'title': 'Twitter web player', 'thumbnail': 're:^https?://.*\.jpg', }, + }, { + 'url': 'https://twitter.com/i/videos/752274308186120192', + 'only_matching': True, }, ] diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 783efda7d..d82261e5e 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -51,7 +51,7 @@ class VevoIE(VevoBaseIE): 'artist': 'Hurts', 'genre': 'Pop', }, - 'expected_warnings': ['Unable to download SMIL file'], + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], }, { 'note': 'v3 SMIL format', 'url': 'http://www.vevo.com/watch/cassadee-pope/i-wish-i-could-break-your-heart/USUV71302923', @@ -67,7 +67,7 @@ class VevoIE(VevoBaseIE): 'artist': 'Cassadee Pope', 'genre': 'Country', }, - 'expected_warnings': ['Unable to download SMIL file'], + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], }, { 'note': 'Age-limited video', 'url': 'https://www.vevo.com/watch/justin-timberlake/tunnel-vision-explicit/USRV81300282', @@ -83,7 +83,7 @@ class VevoIE(VevoBaseIE): 'artist': 'Justin Timberlake', 'genre': 'Pop', }, - 'expected_warnings': ['Unable to download SMIL file'], + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], }, { 'note': 'No video_info', 'url': 'http://www.vevo.com/watch/k-camp-1/Till-I-Die/USUV71503000', @@ -91,15 +91,33 @@ class VevoIE(VevoBaseIE): 'info_dict': { 'id': 'USUV71503000', 'ext': 'mp4', - 'title': 'K Camp - Till I Die', + 'title': 'K Camp ft. T.I. - Till I Die', 'age_limit': 18, 'timestamp': 1449468000, 'upload_date': '20151207', 'uploader': 'K Camp', 'track': 'Till I Die', 'artist': 'K Camp', - 'genre': 'Rap/Hip-Hop', + 'genre': 'Hip-Hop', }, + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], + }, { + 'note': 'Featured test', + 'url': 'https://www.vevo.com/watch/lemaitre/Wait/USUV71402190', + 'md5': 'd28675e5e8805035d949dc5cf161071d', + 'info_dict': { + 'id': 'USUV71402190', + 'ext': 'mp4', + 'title': 'Lemaitre ft. LoLo - Wait', + 'age_limit': 0, + 'timestamp': 1413432000, + 'upload_date': '20141016', + 'uploader': 'Lemaitre', + 'track': 'Wait', + 'artist': 'Lemaitre', + 'genre': 'Electronic', + }, + 'expected_warnings': ['Unable to download SMIL file', 'Unable to download info'], }, { 'note': 'Only available via webpage', 'url': 'http://www.vevo.com/watch/GBUV71600656', @@ -242,8 +260,11 @@ class VevoIE(VevoBaseIE): timestamp = parse_iso8601(video_info.get('releaseDate')) artists = video_info.get('artists') - if artists: - artist = uploader = artists[0]['name'] + for curr_artist in artists: + if curr_artist.get('role') == 'Featured': + featured_artist = curr_artist['name'] + else: + artist = uploader = curr_artist['name'] view_count = int_or_none(video_info.get('views', {}).get('total')) for video_version in video_versions: diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py index 4351ac457..9c48701c1 100644 --- a/youtube_dl/extractor/viki.py +++ b/youtube_dl/extractor/viki.py @@ -1,11 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals -import json -import time -import hmac import hashlib +import hmac import itertools +import json +import re +import time from .common import InfoExtractor from ..utils import ( @@ -276,10 +277,14 @@ class VikiIE(VikiBaseIE): height = int_or_none(self._search_regex( r'^(\d+)[pP]$', format_id, 'height', default=None)) for protocol, format_dict in stream_dict.items(): + # rtmps URLs does not seem to work + if protocol == 'rtmps': + continue + format_url = format_dict['url'] if format_id == 'm3u8': m3u8_formats = self._extract_m3u8_formats( - format_dict['url'], video_id, 'mp4', - entry_protocol='m3u8_native', preference=-1, + format_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='m3u8-%s' % protocol, fatal=False) # Despite CODECS metadata in m3u8 all video-only formats # are actually video+audio @@ -287,9 +292,23 @@ class VikiIE(VikiBaseIE): if f.get('acodec') == 'none' and f.get('vcodec') != 'none': f['acodec'] = None formats.extend(m3u8_formats) + elif format_url.startswith('rtmp'): + mobj = re.search( + r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$', + format_url) + if not mobj: + continue + formats.append({ + 'format_id': 'rtmp-%s' % format_id, + 'ext': 'flv', + 'url': mobj.group('url'), + 'play_path': mobj.group('playpath'), + 'app': mobj.group('app'), + 'page_url': url, + }) else: formats.append({ - 'url': format_dict['url'], + 'url': format_url, 'format_id': '%s-%s' % (format_id, protocol), 'height': height, }) diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py index 8d671cca7..acf9fda48 100644 --- a/youtube_dl/extractor/vlive.py +++ b/youtube_dl/extractor/vlive.py @@ -17,7 +17,7 @@ from ..compat import compat_urllib_parse_urlencode class VLiveIE(InfoExtractor): IE_NAME = 'vlive' _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.vlive.tv/video/1326', 'md5': 'cc7314812855ce56de70a06a27314983', 'info_dict': { @@ -27,7 +27,20 @@ class VLiveIE(InfoExtractor): 'creator': "Girl's Day", 'view_count': int, }, - } + }, { + 'url': 'http://www.vlive.tv/video/16937', + 'info_dict': { + 'id': '16937', + 'ext': 'mp4', + 'title': '[V LIVE] 첸백시 걍방', + 'creator': 'EXO', + 'view_count': int, + 'subtitles': 'mincount:12', + }, + 'params': { + 'skip_download': True, + }, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -116,7 +129,7 @@ class VLiveIE(InfoExtractor): subtitles = {} for caption in playinfo.get('captions', {}).get('list', []): - lang = dict_get(caption, ('language', 'locale', 'country', 'label')) + lang = dict_get(caption, ('locale', 'language', 'country', 'label')) if lang and caption.get('source'): subtitles[lang] = [{ 'ext': 'vtt', diff --git a/youtube_dl/extractor/webcaster.py b/youtube_dl/extractor/webcaster.py new file mode 100644 index 000000000..7486cb347 --- /dev/null +++ b/youtube_dl/extractor/webcaster.py @@ -0,0 +1,102 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + xpath_text, +) + + +class WebcasterIE(InfoExtractor): + _VALID_URL = r'https?://bl\.webcaster\.pro/(?:quote|media)/start/free_(?P<id>[^/]+)' + _TESTS = [{ + # http://video.khl.ru/quotes/393859 + 'url': 'http://bl.webcaster.pro/quote/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104?sr%3D105%26fa%3D1%26type_id%3D18', + 'md5': '0c162f67443f30916ff1c89425dcd4cd', + 'info_dict': { + 'id': 'c8cefd240aa593681c8d068cff59f407_hd', + 'ext': 'mp4', + 'title': 'Сибирь - Нефтехимик. Лучшие моменты первого периода', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }, { + 'url': 'http://bl.webcaster.pro/media/start/free_6246c7a4453ac4c42b4398f840d13100_hd/2_2991109016/e8d0d82587ef435480118f9f9c41db41/4635726126', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_xml(url, video_id) + + title = xpath_text(video, './/event_name', 'event name', fatal=True) + + def make_id(parts, separator): + return separator.join(filter(None, parts)) + + formats = [] + for format_id in (None, 'noise'): + track_tag = make_id(('track', format_id), '_') + for track in video.findall('.//iphone/%s' % track_tag): + track_url = track.text + if not track_url: + continue + if determine_ext(track_url) == 'm3u8': + m3u8_formats = self._extract_m3u8_formats( + track_url, video_id, 'mp4', + entry_protocol='m3u8_native', + m3u8_id=make_id(('hls', format_id), '-'), fatal=False) + for f in m3u8_formats: + f.update({ + 'source_preference': 0 if format_id == 'noise' else 1, + 'format_note': track.get('title'), + }) + formats.extend(m3u8_formats) + self._sort_formats(formats) + + thumbnail = xpath_text(video, './/image', 'thumbnail') + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } + + +class WebcasterFeedIE(InfoExtractor): + _VALID_URL = r'https?://bl\.webcaster\.pro/feed/start/free_(?P<id>[^/]+)' + _TEST = { + 'url': 'http://bl.webcaster.pro/feed/start/free_c8cefd240aa593681c8d068cff59f407_hd/q393859/eb173f99dd5f558674dae55f4ba6806d/1480289104', + 'only_matching': True, + } + + @staticmethod + def _extract_url(ie, webpage): + mobj = re.search( + r'<(?:object|a[^>]+class=["\']webcaster-player["\'])[^>]+data(?:-config)?=(["\']).*?config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_.*?)(?:[?&]|\1)', + webpage) + if mobj: + return mobj.group('url') + for secure in (True, False): + video_url = ie._og_search_video_url( + webpage, secure=secure, default=None) + if video_url: + mobj = re.search( + r'config=(?P<url>https?://bl\.webcaster\.pro/feed/start/free_[^?&=]+)', + video_url) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + video_id = self._match_id(url) + + feed = self._download_xml(url, video_id) + + video_url = xpath_text( + feed, ('video_hd', 'video'), 'video url', fatal=True) + + return self.url_result(video_url, WebcasterIE.ie_key()) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 545246bcd..bd24a2838 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1796,7 +1796,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): | ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,}) )""" - _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' + _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true' _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?' IE_NAME = 'youtube:playlist' _TESTS = [{ @@ -2175,7 +2175,7 @@ class YoutubeUserIE(YoutubeChannelIE): class YoutubeLiveIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube.com live streams' - _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+))/live' + _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live' IE_NAME = 'youtube:live' _TESTS = [{ @@ -2204,6 +2204,9 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor): }, { 'url': 'https://www.youtube.com/c/CommanderVideoHq/live', 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/TheYoungTurks/live', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 9737f7002..a8df4aef0 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -198,12 +198,12 @@ class JSInterpreter(object): return opfunc(x, y) m = re.match( - r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]+)\)$' % _NAME_RE, expr) + r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr) if m: fname = m.group('func') argvals = tuple([ int(v) if v.isdigit() else local_vars[v] - for v in m.group('args').split(',')]) + for v in m.group('args').split(',')]) if len(m.group('args')) > 0 else tuple() if fname not in self._functions: self._functions[fname] = self.extract_function(fname) return self._functions[fname](argvals) diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py index 104807242..0f5d7bdb2 100644 --- a/youtube_dl/socks.py +++ b/youtube_dl/socks.py @@ -55,12 +55,12 @@ class Socks5AddressType(object): ATYP_IPV6 = 0x04 -class ProxyError(IOError): +class ProxyError(socket.error): ERR_SUCCESS = 0x00 def __init__(self, code=None, msg=None): if code is not None and msg is None: - msg = self.CODES.get(code) and 'unknown error' + msg = self.CODES.get(code) or 'unknown error' super(ProxyError, self).__init__(code, msg) @@ -103,6 +103,7 @@ class ProxyType(object): SOCKS4A = 1 SOCKS5 = 2 + Proxy = collections.namedtuple('Proxy', ( 'type', 'host', 'port', 'username', 'password', 'remote_dns')) @@ -122,7 +123,7 @@ class sockssocket(socket.socket): while len(data) < cnt: cur = self.recv(cnt - len(data)) if not cur: - raise IOError('{0} bytes missing'.format(cnt - len(data))) + raise EOFError('{0} bytes missing'.format(cnt - len(data))) data += cur return data diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 7cf490aa4..0c7158575 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -115,6 +115,8 @@ def _u30(reader): res = _read_int(reader) assert res & 0xf0000000 == 0 return res + + _u32 = _read_int @@ -176,6 +178,7 @@ class _Undefined(object): return 'undefined' __repr__ = __str__ + undefined = _Undefined() diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 69df88c6e..1acb630af 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.11.08.1' +__version__ = '2016.12.01'