From 65cceef8f4d0fc4679f98331adb933bd028c3670 Mon Sep 17 00:00:00 2001 From: Ismael Mejia Date: Wed, 26 Jun 2013 11:28:47 +0200 Subject: [PATCH 01/73] Added support for additional vtt subtitle format (WebVTT) in youtube-dl. --- README.md | 2 +- test/test_youtube_subtitles.py | 10 +++++++++- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/__init__.py | 2 +- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7d190249d..e43332724 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,7 @@ which means you can modify it, redistribute it or use it however you like. video (currently youtube only) --list-subs lists all available subtitles for the video (currently youtube only) - --sub-format FORMAT subtitle format [srt/sbv] (default=srt) + --sub-format FORMAT subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only) --sub-lang LANG language of the subtitles to download (optional) use IETF language tags like 'en' diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index dad15de37..5814e13d4 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -84,7 +84,7 @@ class TestYoutubeSubtitles(unittest.TestCase): info_dict = IE.extract('QRS8MkLhQmM') subtitles = info_dict[0]['subtitles'] self.assertEqual(len(subtitles), 13) - def test_youtube_subtitles_format(self): + def test_youtube_subtitles_sbv_format(self): DL = FakeYDL() DL.params['writesubtitles'] = True DL.params['subtitlesformat'] = 'sbv' @@ -92,6 +92,14 @@ class TestYoutubeSubtitles(unittest.TestCase): info_dict = IE.extract('QRS8MkLhQmM') sub = info_dict[0]['subtitles'][0] self.assertEqual(md5(sub[2]), '13aeaa0c245a8bed9a451cb643e3ad8b') + def test_youtube_subtitles_vtt_format(self): + DL = FakeYDL() + DL.params['writesubtitles'] = True + DL.params['subtitlesformat'] = 'vtt' + IE = YoutubeIE(DL) + info_dict = IE.extract('QRS8MkLhQmM') + sub = info_dict[0]['subtitles'][0] + self.assertEqual(md5(sub[2]), '356cdc577fde0c6783b9b822e7206ff7') def test_youtube_list_subtitles(self): DL = FakeYDL() DL.params['listsubtitles'] = True diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e9a24a95a..4abcbbeac 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -75,7 +75,7 @@ class YoutubeDL(object): writeautomaticsub: Write the automatic subtitles to a file allsubtitles: Downloads all the subtitles of the video listsubtitles: Lists all available subtitles for the video - subtitlesformat: Subtitle format [sbv/srt] (default=srt) + subtitlesformat: Subtitle format [srt/sbt/vtt] (default=srt) subtitleslang: Language of the subtitles to download keepvideo: Keep the video file after post-processing daterange: A DateRange object, download only if the upload_date is in the range. diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 42abb8358..6a8fc5e96 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -205,7 +205,7 @@ def parseOpts(overrideArguments=None): help='lists all available subtitles for the video (currently youtube only)', default=False) video_format.add_option('--sub-format', action='store', dest='subtitlesformat', metavar='FORMAT', - help='subtitle format [srt/sbv] (default=srt) (currently youtube only)', default='srt') + help='subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only)', default='srt') video_format.add_option('--sub-lang', '--srt-lang', action='store', dest='subtitleslang', metavar='LANG', help='language of the subtitles to download (optional) use IETF language tags like \'en\'') From 0ca45b233f97f9c2a7faa8fa0f8ff83b93e9fe5a Mon Sep 17 00:00:00 2001 From: Ismael Mejia Date: Wed, 26 Jun 2013 11:34:38 +0200 Subject: [PATCH 02/73] Added missing write-auto-sub option in README file --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index e43332724..d63c5bbe7 100644 --- a/README.md +++ b/README.md @@ -116,6 +116,7 @@ which means you can modify it, redistribute it or use it however you like. -F, --list-formats list all available formats (currently youtube only) --write-sub write subtitle file (currently youtube only) + --write-auto-sub write automatic subtitle file (currently youtube only) --only-sub [deprecated] alias of --skip-download --all-subs downloads all the available subtitles of the video (currently youtube only) From b98a6b2f723de93061b58c062f9834d634b9c754 Mon Sep 17 00:00:00 2001 From: Ismael Mejia Date: Wed, 26 Jun 2013 11:59:29 +0200 Subject: [PATCH 03/73] Fixed typo in subtitle format option (from: sbt => sbv) --- youtube_dl/YoutubeDL.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 4abcbbeac..9931c98e9 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -75,7 +75,7 @@ class YoutubeDL(object): writeautomaticsub: Write the automatic subtitles to a file allsubtitles: Downloads all the subtitles of the video listsubtitles: Lists all available subtitles for the video - subtitlesformat: Subtitle format [srt/sbt/vtt] (default=srt) + subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt) subtitleslang: Language of the subtitles to download keepvideo: Keep the video file after post-processing daterange: A DateRange object, download only if the upload_date is in the range. From 405ec05cb2a1bb1ce27353a831924c17f57b86f4 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Wed, 26 Jun 2013 15:25:53 +0500 Subject: [PATCH 04/73] added an IE for wimp.com --- youtube_dl/extractor/__init__.py | 2 ++ youtube_dl/extractor/wimp.py | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 youtube_dl/extractor/wimp.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0ea990860..82927610a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -49,6 +49,7 @@ from .vbox7 import Vbox7IE from .vevo import VevoIE from .vimeo import VimeoIE from .vine import VineIE +from .wimp import WimpIE from .worldstarhiphop import WorldStarHipHopIE from .xhamster import XHamsterIE from .xnxx import XNXXIE @@ -132,6 +133,7 @@ def gen_extractors(): VevoIE(), JukeboxIE(), TudouIE(), + WimpIE(), GenericIE() ] diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py new file mode 100644 index 000000000..9d52c947e --- /dev/null +++ b/youtube_dl/extractor/wimp.py @@ -0,0 +1,25 @@ +import re +import base64 +from .common import InfoExtractor + + +class WimpIE(InfoExtractor): + _VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group(1) + webpage = self._download_webpage(url, video_id) + title = re.search('\',webpage).group(1) + thumbnail_url = re.search('\',webpage).group(1) + googleString = re.search("googleCode = '(.*?)'", webpage) + googleString = base64.b64decode(googleString.group(1)) + final_url = re.search('","(.*?)"', googleString).group(1) + ext = final_url.split('.')[-1] + return [{ + 'id': video_id, + 'url': final_url, + 'ext': ext, + 'title': title, + 'thumbnail': thumbnail_url, + }] From 6b4642fae32eea550d84333b0631c4afb0f3e8c5 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Wed, 26 Jun 2013 15:40:24 +0500 Subject: [PATCH 05/73] added test for wimp.com --- test/tests.json | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/tests.json b/test/tests.json index 5f4f642e8..a70ddf34a 100644 --- a/test/tests.json +++ b/test/tests.json @@ -695,5 +695,14 @@ "info_dict": { "title": "卡马乔国足开大脚长传冲吊集锦" } + }, + { + "name": "Wimp", + "url": "http://www.wimp.com/deerfence/", + "file": "deerfence.flv", + "md5": "8b215e2e0168c6081a1cf84b2846a2b5", + "info_dict": { + "title": "Watch Till End - Herd of deer jump over a fence.", + } } ] From 8bcc355972020086672b0a3d8dcc2f38694f4672 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Wed, 26 Jun 2013 15:51:25 +0500 Subject: [PATCH 06/73] removed trailing ',' and corrected the title in test --- test/tests.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/tests.json b/test/tests.json index a70ddf34a..816fabf70 100644 --- a/test/tests.json +++ b/test/tests.json @@ -702,7 +702,7 @@ "file": "deerfence.flv", "md5": "8b215e2e0168c6081a1cf84b2846a2b5", "info_dict": { - "title": "Watch Till End - Herd of deer jump over a fence.", + "title": "Watch Till End: Herd of deer jump over a fence." } } ] From 5abeaf06506b35e4c0db315e847ce32843742fe2 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Wed, 26 Jun 2013 17:26:59 +0500 Subject: [PATCH 07/73] changed wimp.py according to the changes suggested by jaime --- youtube_dl/extractor/wimp.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py index 9d52c947e..811b37cc1 100644 --- a/youtube_dl/extractor/wimp.py +++ b/youtube_dl/extractor/wimp.py @@ -10,11 +10,11 @@ class WimpIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group(1) webpage = self._download_webpage(url, video_id) - title = re.search('\',webpage).group(1) - thumbnail_url = re.search('\',webpage).group(1) - googleString = re.search("googleCode = '(.*?)'", webpage) - googleString = base64.b64decode(googleString.group(1)) - final_url = re.search('","(.*?)"', googleString).group(1) + title = self._search_regex('\',webpage, 'video title') + thumbnail_url = self._search_regex('\',webpage,'video thumbnail') + googleString = self._search_regex("googleCode = '(.*?)'", webpage,'file url') + googleString = base64.b64decode(googleString) + final_url = self._search_regex('","(.*?)"', googleString,'final video url') ext = final_url.split('.')[-1] return [{ 'id': video_id, @@ -23,3 +23,4 @@ class WimpIE(InfoExtractor): 'title': title, 'thumbnail': thumbnail_url, }] + From f64e7695a174b597d62a7cd6211d69b5b0f0d0a0 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Wed, 26 Jun 2013 18:46:05 +0500 Subject: [PATCH 08/73] added b'' to my regex expression in order to solve the error on python 3 --- youtube_dl/extractor/wimp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py index 811b37cc1..9ff5112a6 100644 --- a/youtube_dl/extractor/wimp.py +++ b/youtube_dl/extractor/wimp.py @@ -14,7 +14,7 @@ class WimpIE(InfoExtractor): thumbnail_url = self._search_regex('\',webpage,'video thumbnail') googleString = self._search_regex("googleCode = '(.*?)'", webpage,'file url') googleString = base64.b64decode(googleString) - final_url = self._search_regex('","(.*?)"', googleString,'final video url') + final_url = self._search_regex(b'","(.*?)"', googleString,'final video url') ext = final_url.split('.')[-1] return [{ 'id': video_id, From 2e3252801219dc82c379ba00a30c629039b06a24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 26 Jun 2013 16:32:47 +0200 Subject: [PATCH 09/73] FileDownloader: fixed call to "report_error" of YoutubeDL It was being called as "error" --- youtube_dl/FileDownloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 445f3e85e..155895fe2 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -137,7 +137,7 @@ class FileDownloader(object): self.ydl.report_warning(*args, **kargs) def report_error(self, *args, **kargs): - self.ydl.error(*args, **kargs) + self.ydl.report_error(*args, **kargs) def slow_down(self, start_time, byte_counter): """Sleep if the download speed is over the rate limit.""" From b1dfdc51b1062f8e6c5a3270ec04fbf18cd5a867 Mon Sep 17 00:00:00 2001 From: "M.Yasoob Khalid" Date: Wed, 26 Jun 2013 19:41:55 +0500 Subject: [PATCH 10/73] added .decode('ascii') --- youtube_dl/extractor/wimp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py index 9ff5112a6..a548e0fa0 100644 --- a/youtube_dl/extractor/wimp.py +++ b/youtube_dl/extractor/wimp.py @@ -13,8 +13,8 @@ class WimpIE(InfoExtractor): title = self._search_regex('\',webpage, 'video title') thumbnail_url = self._search_regex('\',webpage,'video thumbnail') googleString = self._search_regex("googleCode = '(.*?)'", webpage,'file url') - googleString = base64.b64decode(googleString) - final_url = self._search_regex(b'","(.*?)"', googleString,'final video url') + googleString = base64.b64decode(googleString).decode('ascii') + final_url = self._search_regex('","(.*?)"', googleString,'final video url') ext = final_url.split('.')[-1] return [{ 'id': video_id, From aa0c87391c7b84cde2fa8e307ffe5329e8ed3e5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 26 Jun 2013 17:55:54 +0200 Subject: [PATCH 11/73] Add CSpanIE (closes #312) --- test/tests.json | 10 ++++++++ youtube_dl/extractor/__init__.py | 2 ++ youtube_dl/extractor/cspan.py | 44 ++++++++++++++++++++++++++++++++ 3 files changed, 56 insertions(+) create mode 100644 youtube_dl/extractor/cspan.py diff --git a/test/tests.json b/test/tests.json index 5f4f642e8..aa540792e 100644 --- a/test/tests.json +++ b/test/tests.json @@ -695,5 +695,15 @@ "info_dict": { "title": "卡马乔国足开大脚长传冲吊集锦" } + }, + { + "name": "CSpan", + "url": "http://www.c-spanvideo.org/program/HolderonV", + "file": "315139.flv", + "md5": "74a623266956f69e4df0068ab6c80fe4", + "info_dict": { + "title": "Attorney General Eric Holder on Voting Rights Act Decision" + }, + "skip": "Requires rtmpdump" } ] diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 0ea990860..eaa213609 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -6,6 +6,7 @@ from .bliptv import BlipTVIE, BlipTVUserIE from .breakcom import BreakIE from .collegehumor import CollegeHumorIE from .comedycentral import ComedyCentralIE +from .cspan import CSpanIE from .dailymotion import DailymotionIE from .depositfiles import DepositFilesIE from .eighttracks import EightTracksIE @@ -132,6 +133,7 @@ def gen_extractors(): VevoIE(), JukeboxIE(), TudouIE(), + CSpanIE(), GenericIE() ] diff --git a/youtube_dl/extractor/cspan.py b/youtube_dl/extractor/cspan.py new file mode 100644 index 000000000..2246515f2 --- /dev/null +++ b/youtube_dl/extractor/cspan.py @@ -0,0 +1,44 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_parse, +) + +class CSpanIE(InfoExtractor): + _VALID_URL = r'http://www.c-spanvideo.org/program/(.*)' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + prog_name = mobj.group(1) + webpage = self._download_webpage(url, prog_name) + video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id') + data = compat_urllib_parse.urlencode({'programid': video_id, + 'dynamic':'1'}) + info_url = 'http://www.c-spanvideo.org/common/services/flashXml.php?' + data + video_info = self._download_webpage(info_url, video_id, u'Downloading video info') + + self.report_extraction(video_id) + + title = self._html_search_regex(r'(.*?)', + video_info, 'title') + description = self._html_search_regex(r'(.*?)', + video_info, 'video url') + url = url.replace('$(protocol)', 'rtmp').replace('$(port)', '443') + path = self._search_regex(r'(.*?)', + video_info, 'rtmp play path') + + return {'id': video_id, + 'title': title, + 'ext': 'flv', + 'url': url, + 'play_path': path, + 'description': description, + 'thumbnail': thumbnail, + } From 9b0756f8f26f4c1744073cb2ac4bbe4608ca32ba Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 26 Jun 2013 18:04:47 +0200 Subject: [PATCH 12/73] [vevo] remove unused import --- youtube_dl/extractor/vevo.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index aa88e1a92..49a249ae3 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -3,7 +3,6 @@ import json from .common import InfoExtractor from ..utils import ( - unified_strdate, ExtractorError, ) From bcd606c0feb565b260e0231a655c6fc16e439698 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 26 Jun 2013 21:32:51 +0200 Subject: [PATCH 13/73] ComedycentralIE: Force conversion of the description to unicode (close #941) When writing to a file it would fail. --- youtube_dl/extractor/comedycentral.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 1bb359046..6985e88f0 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -172,7 +172,7 @@ class ComedyCentralIE(InfoExtractor): 'ext': 'mp4', 'format': format, 'thumbnail': None, - 'description': officialTitle, + 'description': compat_str(officialTitle), } results.append(info) From 112da0a0ced71d56db8a1f02a43197b78ca0a851 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 27 Jun 2013 00:09:05 +0200 Subject: [PATCH 14/73] Simplify FakeYDL --- test/helper.py | 33 +++++++++++++++++++++++++++++++++ test/test_youtube_lists.py | 24 +----------------------- test/test_youtube_subtitles.py | 26 +------------------------- 3 files changed, 35 insertions(+), 48 deletions(-) create mode 100644 test/helper.py diff --git a/test/helper.py b/test/helper.py new file mode 100644 index 000000000..842ffc2fd --- /dev/null +++ b/test/helper.py @@ -0,0 +1,33 @@ +import io +import json +import os.path + +from youtube_dl import YoutubeDL, YoutubeDLHandler +from youtube_dl.utils import ( + compat_cookiejar, + compat_urllib_request, +) + +# General configuration (from __init__, not very elegant...) +jar = compat_cookiejar.CookieJar() +cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) +proxy_handler = compat_urllib_request.ProxyHandler() +opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) +compat_urllib_request.install_opener(opener) + +PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") +with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: + parameters = json.load(pf) + +class FakeYDL(YoutubeDL): + def __init__(self): + self.result = [] + # Different instances of the downloader can't share the same dictionary + # some test set the "sublang" parameter, which would break the md5 checks. + self.params = dict(parameters) + def to_screen(self, s): + print(s) + def trouble(self, s, tb=None): + raise Exception(s) + def download(self, x): + self.result.append(x) \ No newline at end of file diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 320b44082..4486b7eb0 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -10,30 +10,8 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.extractor import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE, YoutubeChannelIE from youtube_dl.utils import * -from youtube_dl import YoutubeDL -PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") -with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: - parameters = json.load(pf) - -# General configuration (from __init__, not very elegant...) -jar = compat_cookiejar.CookieJar() -cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) -proxy_handler = compat_urllib_request.ProxyHandler() -opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) -compat_urllib_request.install_opener(opener) - -class FakeYDL(YoutubeDL): - def __init__(self): - self.result = [] - self.params = parameters - def to_screen(self, s): - print(s) - def trouble(self, s, tb=None): - raise Exception(s) - def extract_info(self, url): - self.result.append(url) - return url +from helper import FakeYDL class TestYoutubeLists(unittest.TestCase): def assertIsPlaylist(self,info): diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index 5814e13d4..86e09c9b1 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -12,31 +12,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from youtube_dl.extractor import YoutubeIE from youtube_dl.utils import * -from youtube_dl import YoutubeDL - -PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") -with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: - parameters = json.load(pf) - -# General configuration (from __init__, not very elegant...) -jar = compat_cookiejar.CookieJar() -cookie_processor = compat_urllib_request.HTTPCookieProcessor(jar) -proxy_handler = compat_urllib_request.ProxyHandler() -opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) -compat_urllib_request.install_opener(opener) - -class FakeYDL(YoutubeDL): - def __init__(self): - self.result = [] - # Different instances of the downloader can't share the same dictionary - # some test set the "sublang" parameter, which would break the md5 checks. - self.params = dict(parameters) - def to_screen(self, s): - print(s) - def trouble(self, s, tb=None): - raise Exception(s) - def download(self, x): - self.result.append(x) +from helper import FakeYDL md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() From 5746f9da99bdb2439da152027212d49147133575 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Thu, 27 Jun 2013 00:09:25 +0200 Subject: [PATCH 15/73] Add test for youtube signature algorithm --- test/test_youtube_sig.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100755 test/test_youtube_sig.py diff --git a/test/test_youtube_sig.py b/test/test_youtube_sig.py new file mode 100755 index 000000000..ddd27a9dc --- /dev/null +++ b/test/test_youtube_sig.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python + +import unittest +import sys + +# Allow direct execution +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.extractor.youtube import YoutubeIE +from helper import FakeYDL + +sig = YoutubeIE(FakeYDL())._decrypt_signature + +class TestYoutubeSig(unittest.TestCase): + def test_43_43(self): + wrong = '5AEEAE0EC39677BC65FD9021CCD115F1F2DBD5A59E4.C0B243A3E2DED6769199AF3461781E75122AE135135' + right = '931EA22157E1871643FA9519676DED253A342B0C.4E95A5DBD2F1F511DCC1209DF56CB77693CE0EAE' + self.assertEqual(sig(wrong), right) + +if __name__ == '__main__': + unittest.main() From 75c9481224ab435aa8c38403644a0e8b46b76db0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 27 Jun 2013 00:09:51 +0200 Subject: [PATCH 16/73] ArteTvIE: rewrite the extract process to support the new site (fixes #875) The video can be downloaded with rtmp or http, but the best quality format seems to always use rtmp. Deleted the old methods. --- youtube_dl/extractor/arte.py | 124 +++++++++++------------------------ 1 file changed, 37 insertions(+), 87 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 82e3ffe04..b061b9566 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -1,53 +1,21 @@ import re -import socket +import json from .common import InfoExtractor from ..utils import ( - compat_http_client, - compat_str, - compat_urllib_error, + # This is used by the not implemented extractLiveStream method compat_urllib_parse, - compat_urllib_request, ExtractorError, unified_strdate, ) class ArteTvIE(InfoExtractor): - """arte.tv information extractor.""" - - _VALID_URL = r'(?:http://)?videos\.arte\.tv/(?:fr|de)/videos/.*' + _VALID_URL = r'(?:http://)?www\.arte.tv/guide/(?:fr|de)/(?:(?:sendungen|emissions)/)?(?P.*?)/(?P.*?)(\?.*)?' _LIVE_URL = r'index-[0-9]+\.html$' IE_NAME = u'arte.tv' - def fetch_webpage(self, url): - request = compat_urllib_request.Request(url) - try: - self.report_download_webpage(url) - webpage = compat_urllib_request.urlopen(request).read() - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - raise ExtractorError(u'Unable to retrieve video webpage: %s' % compat_str(err)) - except ValueError as err: - raise ExtractorError(u'Invalid URL: %s' % url) - return webpage - - def grep_webpage(self, url, regex, regexFlags, matchTuples): - page = self.fetch_webpage(url) - mobj = re.search(regex, page, regexFlags) - info = {} - - if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) - - for (i, key, err) in matchTuples: - if mobj.group(i) is None: - raise ExtractorError(err) - else: - info[key] = mobj.group(i) - - return info - # TODO implement Live Stream # def extractLiveStream(self, url): # video_lang = url.split('/')[-4] @@ -75,62 +43,44 @@ class ArteTvIE(InfoExtractor): # ) # video_url = u'%s/%s' % (info.get('url'), info.get('path')) - def extractPlus7Stream(self, url): - video_lang = url.split('/')[-3] - info = self.grep_webpage( - url, - r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)', - 0, - [ - (1, 'url', u'Invalid URL: %s' % url) - ] - ) - next_url = compat_urllib_parse.unquote(info.get('url')) - info = self.grep_webpage( - next_url, - r'