diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a26ff1de4..1fb878b59 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.01** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.10** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.05.01 +[debug] youtube-dl version 2016.05.10 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/.gitignore b/.gitignore index 72c10425d..d5f216b5f 100644 --- a/.gitignore +++ b/.gitignore @@ -31,7 +31,9 @@ updates_key.pem *.part *.swp test/testdata +test/local_parameters.json .tox youtube-dl.zsh .idea .idea/* +tmp/ diff --git a/.travis.yml b/.travis.yml index cc21fae8f..998995845 100644 --- a/.travis.yml +++ b/.travis.yml @@ -7,6 +7,9 @@ python: - "3.4" - "3.5" sudo: false +install: + - bash ./devscripts/install_srelay.sh + - export PATH=$PATH:$(pwd)/tmp/srelay-0.4.8b6 script: nosetests test --verbose notifications: email: diff --git a/AUTHORS b/AUTHORS index bf860b7f7..5ca71ace7 100644 --- a/AUTHORS +++ b/AUTHORS @@ -171,3 +171,4 @@ Philip Huppert blahgeek Kevin Deldycke inondle +Tomáš Čech diff --git a/Makefile b/Makefile index c9ce216d1..5d7cd5a7e 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ test: ot: offlinetest offlinetest: codetest - $(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py + $(PYTHON) -m nose --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py --exclude test_socks.py tar: youtube-dl.tar.gz diff --git a/README.md b/README.md index 50acb26a0..4ef6b6d5a 100644 --- a/README.md +++ b/README.md @@ -85,9 +85,11 @@ which means you can modify it, redistribute it or use it however you like. --no-color Do not emit color codes in output ## Network Options: - --proxy URL Use the specified HTTP/HTTPS proxy. Pass in - an empty string (--proxy "") for direct - connection + --proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. + To enable experimental SOCKS proxy, specify + a proper scheme. For example + socks5://127.0.0.1:1080/. Pass in an empty + string (--proxy "") for direct connection --socket-timeout SECONDS Time to wait before giving up, in seconds --source-address IP Client-side IP address to bind to (experimental) diff --git a/devscripts/install_srelay.sh b/devscripts/install_srelay.sh new file mode 100755 index 000000000..33ce8a3f7 --- /dev/null +++ b/devscripts/install_srelay.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +mkdir -p tmp && cd tmp +wget -N http://downloads.sourceforge.net/project/socks-relay/socks-relay/srelay-0.4.8/srelay-0.4.8b6.tar.gz +tar zxvf srelay-0.4.8b6.tar.gz +cd srelay-0.4.8b6 +./configure +make diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 9fb43671f..de84e5c84 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -77,6 +77,7 @@ - **Bild**: Bild.de - **BiliBili** - **BioBioChileTV** + - **BIQLE** - **BleacherReport** - **BleacherReportCMS** - **blinkx** @@ -145,6 +146,7 @@ - **culturebox.francetvinfo.fr** - **CultureUnplugged** - **CWTV** + - **DailyMail** - **dailymotion** - **dailymotion:playlist** - **dailymotion:user** @@ -325,6 +327,7 @@ - **limelight** - **limelight:channel** - **limelight:channel_list** + - **LiTV** - **LiveLeak** - **livestream** - **livestream:original** @@ -374,6 +377,8 @@ - **mtvservices:embedded** - **MuenchenTV**: münchen.tv - **MusicPlayOn** + - **mva**: Microsoft Virtual Academy videos + - **mva:course**: Microsoft Virtual Academy courses - **Mwave** - **MwaveMeetGreet** - **MySpace** @@ -463,7 +468,8 @@ - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) - **pcmag** - **People** - - **Periscope**: Periscope + - **periscope**: Periscope + - **periscope:user**: Periscope user videos - **PhilharmonieDeParis**: Philharmonie de Paris - **phoenix.de** - **Photobucket** @@ -700,6 +706,7 @@ - **Vessel** - **Vesti**: Вести.Ru - **Vevo** + - **VevoPlaylist** - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet - **vh1.com** - **Vice** @@ -772,7 +779,7 @@ - **WSJ**: Wall Street Journal - **XBef** - **XboxClips** - - **XFileShare**: XFileShare based sites: GorillaVid.in, daclips.in, movpod.in, fastvideo.in, realvid.net, filehoot.com and vidto.me + - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To - **XHamster** - **XHamsterEmbed** - **xiami:album**: 虾米音乐 - 专辑 diff --git a/test/helper.py b/test/helper.py index b8e22c5cb..dfee217a9 100644 --- a/test/helper.py +++ b/test/helper.py @@ -24,8 +24,13 @@ from youtube_dl.utils import ( def get_params(override=None): PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") + LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), + "local_parameters.json") with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: parameters = json.load(pf) + if os.path.exists(LOCAL_PARAMETERS_FILE): + with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf: + parameters.update(json.load(pf)) if override: parameters.update(override) return parameters diff --git a/test/test_compat.py b/test/test_compat.py index 618668210..539b30540 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -10,13 +10,14 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.utils import get_filesystem_encoding from youtube_dl.compat import ( compat_getenv, + compat_setenv, compat_etree_fromstring, compat_expanduser, compat_shlex_split, compat_str, + compat_struct_unpack, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, compat_urllib_parse_urlencode, @@ -26,19 +27,22 @@ from youtube_dl.compat import ( class TestCompat(unittest.TestCase): def test_compat_getenv(self): test_str = 'тест' - os.environ['YOUTUBE-DL-TEST'] = ( - test_str if sys.version_info >= (3, 0) - else test_str.encode(get_filesystem_encoding())) + compat_setenv('YOUTUBE-DL-TEST', test_str) self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str) + def test_compat_setenv(self): + test_var = 'YOUTUBE-DL-TEST' + test_str = 'тест' + compat_setenv(test_var, test_str) + compat_getenv(test_var) + self.assertEqual(compat_getenv(test_var), test_str) + def test_compat_expanduser(self): old_home = os.environ.get('HOME') test_str = 'C:\Documents and Settings\тест\Application Data' - os.environ['HOME'] = ( - test_str if sys.version_info >= (3, 0) - else test_str.encode(get_filesystem_encoding())) + compat_setenv('HOME', test_str) self.assertEqual(compat_expanduser('~'), test_str) - os.environ['HOME'] = old_home + compat_setenv('HOME', old_home or '') def test_all_present(self): import youtube_dl.compat @@ -99,5 +103,9 @@ class TestCompat(unittest.TestCase): self.assertTrue(isinstance(doc.find('chinese').text, compat_str)) self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str)) + def test_struct_unpack(self): + self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,)) + + if __name__ == '__main__': unittest.main() diff --git a/test/test_socks.py b/test/test_socks.py new file mode 100644 index 000000000..d07003ceb --- /dev/null +++ b/test/test_socks.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python +# coding: utf-8 +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import random +import subprocess + +from test.helper import ( + FakeYDL, + get_params, +) +from youtube_dl.compat import ( + compat_str, + compat_urllib_request, +) + + +class TestMultipleSocks(unittest.TestCase): + @staticmethod + def _check_params(attrs): + params = get_params() + for attr in attrs: + if attr not in params: + print('Missing %s. Skipping.' % attr) + return + return params + + def test_proxy_http(self): + params = self._check_params(['primary_proxy', 'primary_server_ip']) + if params is None: + return + ydl = FakeYDL({ + 'proxy': params['primary_proxy'] + }) + self.assertEqual( + ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8'), + params['primary_server_ip']) + + def test_proxy_https(self): + params = self._check_params(['primary_proxy', 'primary_server_ip']) + if params is None: + return + ydl = FakeYDL({ + 'proxy': params['primary_proxy'] + }) + self.assertEqual( + ydl.urlopen('https://yt-dl.org/ip').read().decode('utf-8'), + params['primary_server_ip']) + + def test_secondary_proxy_http(self): + params = self._check_params(['secondary_proxy', 'secondary_server_ip']) + if params is None: + return + ydl = FakeYDL() + req = compat_urllib_request.Request('http://yt-dl.org/ip') + req.add_header('Ytdl-request-proxy', params['secondary_proxy']) + self.assertEqual( + ydl.urlopen(req).read().decode('utf-8'), + params['secondary_server_ip']) + + def test_secondary_proxy_https(self): + params = self._check_params(['secondary_proxy', 'secondary_server_ip']) + if params is None: + return + ydl = FakeYDL() + req = compat_urllib_request.Request('https://yt-dl.org/ip') + req.add_header('Ytdl-request-proxy', params['secondary_proxy']) + self.assertEqual( + ydl.urlopen(req).read().decode('utf-8'), + params['secondary_server_ip']) + + +class TestSocks(unittest.TestCase): + def setUp(self): + self.port = random.randint(20000, 30000) + self.server_process = subprocess.Popen([ + 'srelay', '-f', '-i', '127.0.0.1:%d' % self.port], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + def tearDown(self): + self.server_process.terminate() + self.server_process.communicate() + + def _get_ip(self, protocol): + ydl = FakeYDL({ + 'proxy': '%s://127.0.0.1:%d' % (protocol, self.port), + }) + return ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8') + + def test_socks4(self): + self.assertTrue(isinstance(self._get_ip('socks4'), compat_str)) + + def test_socks4a(self): + self.assertTrue(isinstance(self._get_ip('socks4a'), compat_str)) + + def test_socks5(self): + self.assertTrue(isinstance(self._get_ip('socks5'), compat_str)) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index 00ada95ec..5702ffa97 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -55,7 +55,6 @@ from youtube_dl.utils import ( smuggle_url, str_to_int, strip_jsonp, - struct_unpack, timeconvert, unescapeHTML, unified_strdate, @@ -457,9 +456,6 @@ class TestUtil(unittest.TestCase): testPL(5, 2, (2, 99), [2, 3, 4]) testPL(5, 2, (20, 99), []) - def test_struct_unpack(self): - self.assertEqual(struct_unpack('!B', b'\x00'), (0,)) - def test_read_batch_urls(self): f = io.StringIO('''\xef\xbb\xbf foo bar\r diff --git a/tox.ini b/tox.ini index 2d7134005..9c4e4a3d1 100644 --- a/tox.ini +++ b/tox.ini @@ -9,5 +9,6 @@ passenv = HOME defaultargs = test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py + --exclude test_socks.py commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html # test.test_download:TestDownload.test_NowVideo diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index cdfaa2658..ef0559d02 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -64,6 +64,7 @@ from .utils import ( PostProcessingError, preferredencoding, prepend_extension, + register_socks_protocols, render_table, replace_extension, SameFileError, @@ -361,6 +362,8 @@ class YoutubeDL(object): for ph in self.params.get('progress_hooks', []): self.add_progress_hook(ph) + register_socks_protocols() + def warn_if_short_id(self, argv): # short YouTube ID starting with dash? idxs = [ diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 0b6c5ca7a..1392361a1 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -11,6 +11,7 @@ import re import shlex import shutil import socket +import struct import subprocess import sys import itertools @@ -340,9 +341,9 @@ except ImportError: # Python 2 return parsed_result try: - from shlex import quote as shlex_quote + from shlex import quote as compat_shlex_quote except ImportError: # Python < 3.3 - def shlex_quote(s): + def compat_shlex_quote(s): if re.match(r'^[-_\w./]+$', s): return s else: @@ -373,6 +374,9 @@ compat_os_name = os._name if os.name == 'java' else os.name if sys.version_info >= (3, 0): compat_getenv = os.getenv compat_expanduser = os.path.expanduser + + def compat_setenv(key, value, env=os.environ): + env[key] = value else: # Environment variables should be decoded with filesystem encoding. # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918) @@ -384,6 +388,12 @@ else: env = env.decode(get_filesystem_encoding()) return env + def compat_setenv(key, value, env=os.environ): + def encode(v): + from .utils import get_filesystem_encoding + return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v + env[encode(key)] = encode(value) + # HACK: The default implementations of os.path.expanduser from cpython do not decode # environment variables with filesystem encoding. We will work around this by # providing adjusted implementations. @@ -456,18 +466,6 @@ else: print(s) -try: - subprocess_check_output = subprocess.check_output -except AttributeError: - def subprocess_check_output(*args, **kwargs): - assert 'input' not in kwargs - p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs) - output, _ = p.communicate() - ret = p.poll() - if ret: - raise subprocess.CalledProcessError(ret, p.args, output=output) - return output - if sys.version_info < (3, 0) and sys.platform == 'win32': def compat_getpass(prompt, *args, **kwargs): if isinstance(prompt, compat_str): @@ -583,6 +581,26 @@ if sys.version_info >= (3, 0): else: from tokenize import generate_tokens as compat_tokenize_tokenize + +try: + struct.pack('!I', 0) +except TypeError: + # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument + # See https://bugs.python.org/issue19099 + def compat_struct_pack(spec, *args): + if isinstance(spec, compat_str): + spec = spec.encode('ascii') + return struct.pack(spec, *args) + + def compat_struct_unpack(spec, *args): + if isinstance(spec, compat_str): + spec = spec.encode('ascii') + return struct.unpack(spec, *args) +else: + compat_struct_pack = struct.pack + compat_struct_unpack = struct.unpack + + __all__ = [ 'compat_HTMLParser', 'compat_HTTPError', @@ -604,9 +622,13 @@ __all__ = [ 'compat_os_name', 'compat_parse_qs', 'compat_print', + 'compat_setenv', + 'compat_shlex_quote', 'compat_shlex_split', 'compat_socket_create_connection', 'compat_str', + 'compat_struct_pack', + 'compat_struct_unpack', 'compat_subprocess_get_DEVNULL', 'compat_tokenize_tokenize', 'compat_urllib_error', @@ -623,7 +645,5 @@ __all__ = [ 'compat_urlretrieve', 'compat_xml_parse_error', 'compat_xpath', - 'shlex_quote', - 'subprocess_check_output', 'workaround_optparse_bug9161', ] diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 45f49c350..3a73cee1c 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -6,6 +6,7 @@ import sys import re from .common import FileDownloader +from ..compat import compat_setenv from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS from ..utils import ( cli_option, @@ -198,6 +199,18 @@ class FFmpegFD(ExternalFD): '-headers', ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())] + env = None + proxy = self.params.get('proxy') + if proxy: + if not re.match(r'^[\da-zA-Z]+://', proxy): + proxy = 'http://%s' % proxy + # Since December 2015 ffmpeg supports -http_proxy option (see + # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) + # We could switch to the following code if we are able to detect version properly + # args += ['-http_proxy', proxy] + env = os.environ.copy() + compat_setenv('HTTP_PROXY', proxy, env=env) + protocol = info_dict.get('protocol') if protocol == 'rtmp': @@ -239,7 +252,7 @@ class FFmpegFD(ExternalFD): self._debug_cmd(args) - proc = subprocess.Popen(args, stdin=subprocess.PIPE) + proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env) try: retval = proc.wait() except KeyboardInterrupt: diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 664d87543..3d9337afa 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -12,13 +12,13 @@ from ..compat import ( compat_urlparse, compat_urllib_error, compat_urllib_parse_urlparse, + compat_struct_pack, + compat_struct_unpack, ) from ..utils import ( encodeFilename, fix_xml_ampersands, sanitize_open, - struct_pack, - struct_unpack, xpath_text, ) @@ -31,13 +31,13 @@ class FlvReader(io.BytesIO): # Utility functions for reading numbers and strings def read_unsigned_long_long(self): - return struct_unpack('!Q', self.read(8))[0] + return compat_struct_unpack('!Q', self.read(8))[0] def read_unsigned_int(self): - return struct_unpack('!I', self.read(4))[0] + return compat_struct_unpack('!I', self.read(4))[0] def read_unsigned_char(self): - return struct_unpack('!B', self.read(1))[0] + return compat_struct_unpack('!B', self.read(1))[0] def read_string(self): res = b'' @@ -194,11 +194,11 @@ def build_fragments_list(boot_info): def write_unsigned_int(stream, val): - stream.write(struct_pack('!I', val)) + stream.write(compat_struct_pack('!I', val)) def write_unsigned_int_24(stream, val): - stream.write(struct_pack('!I', val)[1:]) + stream.write(compat_struct_pack('!I', val)[1:]) def write_flv_header(stream): diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index d7b34bde3..62136ee54 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -23,10 +23,15 @@ class HlsFD(FragmentFD): UNSUPPORTED_FEATURES = ( r'#EXT-X-KEY:METHOD=(?!NONE)', # encrypted streams [1] r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] - r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] + # Live streams heuristic does not always work (e.g. geo restricted to Germany + # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) + # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] + r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of + # event media playlists [4] # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 + # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 ) return all(not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 6652c8e42..5a58d1777 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -33,19 +33,33 @@ class CeskaTelevizeIE(InfoExtractor): 'skip_download': True, }, }, { - 'url': 'http://www.ceskatelevize.cz/ivysilani/10532695142-prvni-republika/bonus/14716-zpevacka-z-duparny-bobina', + 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', 'info_dict': { - 'id': '61924494876844374', + 'id': '61924494877028507', 'ext': 'mp4', - 'title': 'První republika: Zpěvačka z Dupárny Bobina', - 'description': 'Sága mapující atmosféru první republiky od r. 1918 do r. 1945.', + 'title': 'Hyde Park Civilizace: Bonus 01 - En', + 'description': 'English Subtittles', 'thumbnail': 're:^https?://.*\.jpg', - 'duration': 88.4, + 'duration': 81.3, }, 'params': { # m3u8 download 'skip_download': True, }, + }, { + # live stream + 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', + 'info_dict': { + 'id': 402, + 'ext': 'mp4', + 'title': 're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', + 'is_live': True, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'skip': 'Georestricted to Czech Republic', }, { # video with 18+ caution trailer 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', @@ -118,19 +132,21 @@ class CeskaTelevizeIE(InfoExtractor): req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) req.add_header('Referer', url) - playlist_title = self._og_search_title(webpage) - playlist_description = self._og_search_description(webpage) + playlist_title = self._og_search_title(webpage, default=None) + playlist_description = self._og_search_description(webpage, default=None) playlist = self._download_json(req, playlist_id)['playlist'] playlist_len = len(playlist) entries = [] for item in playlist: + is_live = item.get('type') == 'LIVE' formats = [] for format_id, stream_url in item['streamUrls'].items(): formats.extend(self._extract_m3u8_formats( stream_url, playlist_id, 'mp4', - entry_protocol='m3u8_native', fatal=False)) + entry_protocol='m3u8' if is_live else 'm3u8_native', + fatal=False)) self._sort_formats(formats) item_id = item.get('id') or item['assetId'] @@ -145,14 +161,22 @@ class CeskaTelevizeIE(InfoExtractor): if subs: subtitles = self.extract_subtitles(episode_id, subs) + if playlist_len == 1: + final_title = playlist_title or title + if is_live: + final_title = self._live_title(final_title) + else: + final_title = '%s (%s)' % (playlist_title, title) + entries.append({ 'id': item_id, - 'title': playlist_title if playlist_len == 1 else '%s (%s)' % (playlist_title, title), + 'title': final_title, 'description': playlist_description if playlist_len == 1 else None, 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, 'subtitles': subtitles, + 'is_live': is_live, }) return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 79af47715..edd0d108e 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -6,6 +6,9 @@ import re import time from .common import InfoExtractor +from ..compat import ( + compat_struct_unpack, +) from ..utils import ( ExtractorError, float_or_none, @@ -13,7 +16,6 @@ from ..utils import ( remove_start, sanitized_Request, std_headers, - struct_unpack, ) @@ -21,7 +23,7 @@ def _decrypt_url(png): encrypted_data = base64.b64decode(png.encode('utf-8')) text_index = encrypted_data.find(b'tEXt') text_chunk = encrypted_data[text_index - 4:] - length = struct_unpack('!I', text_chunk[:4])[0] + length = compat_struct_unpack('!I', text_chunk[:4])[0] # Use bytearray to get integers when iterating in both python 2.x and 3.x data = bytearray(text_chunk[8:8 + length]) data = [chr(b) for b in data if b != 0] diff --git a/youtube_dl/options.py b/youtube_dl/options.py index d1f8d1331..38efd292d 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -188,7 +188,10 @@ def parseOpts(overrideArguments=None): network.add_option( '--proxy', dest='proxy', default=None, metavar='URL', - help='Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection') + help='Use the specified HTTP/HTTPS/SOCKS proxy. To enable experimental ' + 'SOCKS proxy, specify a proper scheme. For example ' + 'socks5://127.0.0.1:1080/. Pass in an empty string (--proxy "") ' + 'for direct connection') network.add_option( '--socket-timeout', dest='socket_timeout', type=float, default=None, metavar='SECONDS', diff --git a/youtube_dl/postprocessor/execafterdownload.py b/youtube_dl/postprocessor/execafterdownload.py index 74f66d669..90630c2d7 100644 --- a/youtube_dl/postprocessor/execafterdownload.py +++ b/youtube_dl/postprocessor/execafterdownload.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import subprocess from .common import PostProcessor -from ..compat import shlex_quote +from ..compat import compat_shlex_quote from ..utils import PostProcessingError @@ -17,7 +17,7 @@ class ExecAfterDownloadPP(PostProcessor): if '{}' not in cmd: cmd += ' {}' - cmd = cmd.replace('{}', shlex_quote(information['filepath'])) + cmd = cmd.replace('{}', compat_shlex_quote(information['filepath'])) self._downloader.to_screen('[exec] Executing command: %s' % cmd) retCode = subprocess.call(cmd, shell=True) diff --git a/youtube_dl/socks.py b/youtube_dl/socks.py new file mode 100644 index 000000000..fd49d7435 --- /dev/null +++ b/youtube_dl/socks.py @@ -0,0 +1,271 @@ +# Public Domain SOCKS proxy protocol implementation +# Adapted from https://gist.github.com/bluec0re/cafd3764412967417fd3 + +from __future__ import unicode_literals + +# References: +# SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol +# SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol +# SOCKS5 protocol https://tools.ietf.org/html/rfc1928 +# SOCKS5 username/password authentication https://tools.ietf.org/html/rfc1929 + +import collections +import socket + +from .compat import ( + compat_ord, + compat_struct_pack, + compat_struct_unpack, +) + +__author__ = 'Timo Schmid ' + +SOCKS4_VERSION = 4 +SOCKS4_REPLY_VERSION = 0x00 +# Excerpt from SOCKS4A protocol: +# if the client cannot resolve the destination host's domain name to find its +# IP address, it should set the first three bytes of DSTIP to NULL and the last +# byte to a non-zero value. +SOCKS4_DEFAULT_DSTIP = compat_struct_pack('!BBBB', 0, 0, 0, 0xFF) + +SOCKS5_VERSION = 5 +SOCKS5_USER_AUTH_VERSION = 0x01 +SOCKS5_USER_AUTH_SUCCESS = 0x00 + + +class Socks4Command(object): + CMD_CONNECT = 0x01 + CMD_BIND = 0x02 + + +class Socks5Command(Socks4Command): + CMD_UDP_ASSOCIATE = 0x03 + + +class Socks5Auth(object): + AUTH_NONE = 0x00 + AUTH_GSSAPI = 0x01 + AUTH_USER_PASS = 0x02 + AUTH_NO_ACCEPTABLE = 0xFF # For server response + + +class Socks5AddressType(object): + ATYP_IPV4 = 0x01 + ATYP_DOMAINNAME = 0x03 + ATYP_IPV6 = 0x04 + + +class ProxyError(IOError): + ERR_SUCCESS = 0x00 + + def __init__(self, code=None, msg=None): + if code is not None and msg is None: + msg = self.CODES.get(code) and 'unknown error' + super(ProxyError, self).__init__(code, msg) + + +class InvalidVersionError(ProxyError): + def __init__(self, expected_version, got_version): + msg = ('Invalid response version from server. Expected {0:02x} got ' + '{1:02x}'.format(expected_version, got_version)) + super(InvalidVersionError, self).__init__(0, msg) + + +class Socks4Error(ProxyError): + ERR_SUCCESS = 90 + + CODES = { + 91: 'request rejected or failed', + 92: 'request rejected becasue SOCKS server cannot connect to identd on the client', + 93: 'request rejected because the client program and identd report different user-ids' + } + + +class Socks5Error(ProxyError): + ERR_GENERAL_FAILURE = 0x01 + + CODES = { + 0x01: 'general SOCKS server failure', + 0x02: 'connection not allowed by ruleset', + 0x03: 'Network unreachable', + 0x04: 'Host unreachable', + 0x05: 'Connection refused', + 0x06: 'TTL expired', + 0x07: 'Command not supported', + 0x08: 'Address type not supported', + 0xFE: 'unknown username or invalid password', + 0xFF: 'all offered authentication methods were rejected' + } + + +class ProxyType(object): + SOCKS4 = 0 + SOCKS4A = 1 + SOCKS5 = 2 + +Proxy = collections.namedtuple('Proxy', ( + 'type', 'host', 'port', 'username', 'password', 'remote_dns')) + + +class sockssocket(socket.socket): + def __init__(self, *args, **kwargs): + self._proxy = None + super(sockssocket, self).__init__(*args, **kwargs) + + def setproxy(self, proxytype, addr, port, rdns=True, username=None, password=None): + assert proxytype in (ProxyType.SOCKS4, ProxyType.SOCKS4A, ProxyType.SOCKS5) + + self._proxy = Proxy(proxytype, addr, port, username, password, rdns) + + def recvall(self, cnt): + data = b'' + while len(data) < cnt: + cur = self.recv(cnt - len(data)) + if not cur: + raise IOError('{0} bytes missing'.format(cnt - len(data))) + data += cur + return data + + def _recv_bytes(self, cnt): + data = self.recvall(cnt) + return compat_struct_unpack('!{0}B'.format(cnt), data) + + @staticmethod + def _len_and_data(data): + return compat_struct_pack('!B', len(data)) + data + + def _check_response_version(self, expected_version, got_version): + if got_version != expected_version: + self.close() + raise InvalidVersionError(expected_version, got_version) + + def _resolve_address(self, destaddr, default, use_remote_dns): + try: + return socket.inet_aton(destaddr) + except socket.error: + if use_remote_dns and self._proxy.remote_dns: + return default + else: + return socket.inet_aton(socket.gethostbyname(destaddr)) + + def _setup_socks4(self, address, is_4a=False): + destaddr, port = address + + ipaddr = self._resolve_address(destaddr, SOCKS4_DEFAULT_DSTIP, use_remote_dns=is_4a) + + packet = compat_struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr + + username = (self._proxy.username or '').encode('utf-8') + packet += username + b'\x00' + + if is_4a and self._proxy.remote_dns: + packet += destaddr.encode('utf-8') + b'\x00' + + self.sendall(packet) + + version, resp_code, dstport, dsthost = compat_struct_unpack('!BBHI', self.recvall(8)) + + self._check_response_version(SOCKS4_REPLY_VERSION, version) + + if resp_code != Socks4Error.ERR_SUCCESS: + self.close() + raise Socks4Error(resp_code) + + return (dsthost, dstport) + + def _setup_socks4a(self, address): + self._setup_socks4(address, is_4a=True) + + def _socks5_auth(self): + packet = compat_struct_pack('!B', SOCKS5_VERSION) + + auth_methods = [Socks5Auth.AUTH_NONE] + if self._proxy.username and self._proxy.password: + auth_methods.append(Socks5Auth.AUTH_USER_PASS) + + packet += compat_struct_pack('!B', len(auth_methods)) + packet += compat_struct_pack('!{0}B'.format(len(auth_methods)), *auth_methods) + + self.sendall(packet) + + version, method = self._recv_bytes(2) + + self._check_response_version(SOCKS5_VERSION, version) + + if method == Socks5Auth.AUTH_NO_ACCEPTABLE: + self.close() + raise Socks5Error(method) + + if method == Socks5Auth.AUTH_USER_PASS: + username = self._proxy.username.encode('utf-8') + password = self._proxy.password.encode('utf-8') + packet = compat_struct_pack('!B', SOCKS5_USER_AUTH_VERSION) + packet += self._len_and_data(username) + self._len_and_data(password) + self.sendall(packet) + + version, status = self._recv_bytes(2) + + self._check_response_version(SOCKS5_USER_AUTH_VERSION, version) + + if status != SOCKS5_USER_AUTH_SUCCESS: + self.close() + raise Socks5Error(Socks5Error.ERR_GENERAL_FAILURE) + + def _setup_socks5(self, address): + destaddr, port = address + + ipaddr = self._resolve_address(destaddr, None, use_remote_dns=True) + + self._socks5_auth() + + reserved = 0 + packet = compat_struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved) + if ipaddr is None: + destaddr = destaddr.encode('utf-8') + packet += compat_struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME) + packet += self._len_and_data(destaddr) + else: + packet += compat_struct_pack('!B', Socks5AddressType.ATYP_IPV4) + ipaddr + packet += compat_struct_pack('!H', port) + + self.sendall(packet) + + version, status, reserved, atype = self._recv_bytes(4) + + self._check_response_version(SOCKS5_VERSION, version) + + if status != Socks5Error.ERR_SUCCESS: + self.close() + raise Socks5Error(status) + + if atype == Socks5AddressType.ATYP_IPV4: + destaddr = self.recvall(4) + elif atype == Socks5AddressType.ATYP_DOMAINNAME: + alen = compat_ord(self.recv(1)) + destaddr = self.recvall(alen) + elif atype == Socks5AddressType.ATYP_IPV6: + destaddr = self.recvall(16) + destport = compat_struct_unpack('!H', self.recvall(2))[0] + + return (destaddr, destport) + + def _make_proxy(self, connect_func, address): + if not self._proxy: + return connect_func(self, address) + + result = connect_func(self, (self._proxy.host, self._proxy.port)) + if result != 0 and result is not None: + return result + setup_funcs = { + ProxyType.SOCKS4: self._setup_socks4, + ProxyType.SOCKS4A: self._setup_socks4a, + ProxyType.SOCKS5: self._setup_socks5, + } + setup_funcs[self._proxy.type](address) + return result + + def connect(self, address): + self._make_proxy(socket.socket.connect, address) + + def connect_ex(self, address): + return self._make_proxy(socket.socket.connect_ex, address) diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 06c1d6cc1..7cf490aa4 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -4,10 +4,12 @@ import collections import io import zlib -from .compat import compat_str +from .compat import ( + compat_str, + compat_struct_unpack, +) from .utils import ( ExtractorError, - struct_unpack, ) @@ -23,17 +25,17 @@ def _extract_tags(file_contents): file_contents[:1]) # Determine number of bits in framesize rectangle - framesize_nbits = struct_unpack('!B', content[:1])[0] >> 3 + framesize_nbits = compat_struct_unpack('!B', content[:1])[0] >> 3 framesize_len = (5 + 4 * framesize_nbits + 7) // 8 pos = framesize_len + 2 + 2 while pos < len(content): - header16 = struct_unpack('> 6 tag_len = header16 & 0x3f if tag_len == 0x3f: - tag_len = struct_unpack('= 0x80) else b'\x00' - return struct_unpack(' 2.6 + self.sock = self._context.wrap_socket( + self.sock, server_hostname=self.host) + else: + self.sock = ssl.wrap_socket(self.sock) + + return SocksConnection + + class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): def __init__(self, params, https_conn_class=None, *args, **kwargs): compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs) @@ -857,12 +915,20 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): def https_open(self, req): kwargs = {} + conn_class = self._https_conn_class + if hasattr(self, '_context'): # python > 2.6 kwargs['context'] = self._context if hasattr(self, '_check_hostname'): # python 3.x kwargs['check_hostname'] = self._check_hostname + + socks_proxy = req.headers.get('Ytdl-socks-proxy') + if socks_proxy: + conn_class = make_socks_conn_class(conn_class, socks_proxy) + del req.headers['Ytdl-socks-proxy'] + return self.do_open(functools.partial( - _create_http_connection, self, self._https_conn_class, True), + _create_http_connection, self, conn_class, True), req, **kwargs) @@ -1193,7 +1259,7 @@ def bytes_to_intlist(bs): def intlist_to_bytes(xs): if not xs: return b'' - return struct_pack('%dB' % len(xs), *xs) + return compat_struct_pack('%dB' % len(xs), *xs) # Cross-platform file locking @@ -1761,24 +1827,6 @@ def escape_url(url): fragment=escape_rfc3986(url_parsed.fragment) ).geturl() -try: - struct.pack('!I', 0) -except TypeError: - # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument - # See https://bugs.python.org/issue19099 - def struct_pack(spec, *args): - if isinstance(spec, compat_str): - spec = spec.encode('ascii') - return struct.pack(spec, *args) - - def struct_unpack(spec, *args): - if isinstance(spec, compat_str): - spec = spec.encode('ascii') - return struct.unpack(spec, *args) -else: - struct_pack = struct.pack - struct_unpack = struct.unpack - def read_batch_urls(batch_fd): def fixup(url): @@ -1929,7 +1977,7 @@ def ytdl_is_updateable(): def args_to_str(args): # Get a short string representation for a subprocess command - return ' '.join(shlex_quote(a) for a in args) + return ' '.join(compat_shlex_quote(a) for a in args) def error_to_compat_str(err): @@ -2701,6 +2749,10 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler): if proxy == '__noproxy__': return None # No Proxy + if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'): + req.add_header('Ytdl-socks-proxy', proxy) + # youtube-dl's http/https handlers do wrapping the socket with socks + return None return compat_urllib_request.ProxyHandler.proxy_open( self, req, proxy, type) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 551160897..45e40c0d1 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.05.01' +__version__ = '2016.05.10'