diff --git a/.travis.yml b/.travis.yml index 7f1fa8a3c..45b71f11b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,7 @@ notifications: - filippo.valsorda@gmail.com - phihag@phihag.de - jaime.marquinez.ferrandiz+travis@gmail.com + - yasoob.khld@gmail.com # irc: # channels: # - "irc.freenode.org#youtube-dl" diff --git a/README.md b/README.md index b246d3c53..560bcdca1 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,9 @@ which means you can modify it, redistribute it or use it however you like. # OPTIONS -h, --help print this help text and exit --version print program version and exit - -U, --update update this program to latest version + -U, --update update this program to latest version. Make sure + that you have sufficient permissions (run with + sudo if needed) -i, --ignore-errors continue on download errors --dump-user-agent display the current browser identification --user-agent UA specify a custom user agent diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py index c3d69e6f4..22977ccd9 100644 --- a/devscripts/youtube_genalgo.py +++ b/devscripts/youtube_genalgo.py @@ -5,27 +5,39 @@ import sys tests = [ + # 92 - vflQw-fB4 2013/07/17 + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"", + "mrtyuioplkjhgfdsazxcvbnq1234567890QWERTY}IOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]\"|:;"), + # 90 + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`", + "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"), # 88 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<", "J:|}][{=+-_)(*&;%$#@>MNBVCXZASDFGH^KLPOIUYTREWQ0987654321mnbvcxzasdfghrklpoiuytej"), - # 87 + # 87 - vflART1Nf 2013/07/24 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<", - "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"), + "tyuioplkjhgfdsazxcv"), # 86 - vfl_ymO4Z 2013/06/27 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<", "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"), - # 85 + # 85 - vflSAFCP9 2013/07/19 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<", - "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr"), + "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c"), # 84 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<", "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWe098765432rmnbvcxzasdfghjklpoiuyt1"), - # 83 + # 83 - vflcaqGO8 2013/07/11 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<", - "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS.<"), # 82 ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<", "Q>/?;}[{=+-(*<^%$#@!MNBVCXZASDFGHKLPOIUY8REWT0q&7654321mnbvcxzasdfghjklpoiuytrew9"), + # 81 - vflLC8JvQ 2013/07/25 + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.", + "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), + # 79 - vflLC8JvQ 2013/07/25 (sporadic) + ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/", + "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"), ] def find_matching(wrong, right): diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 39a5ee33a..c73d0e467 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -61,6 +61,17 @@ class TestAllURLsMatching(unittest.TestCase): else: self.assertFalse(ie.suitable(url), '%s should not match URL %r' % (type(ie).__name__, url)) + def test_keywords(self): + ies = gen_extractors() + matching_ies = lambda url: [ie.IE_NAME for ie in ies + if ie.suitable(url) and ie.IE_NAME != 'generic'] + self.assertEqual(matching_ies(':ytsubs'), ['youtube:subscriptions']) + self.assertEqual(matching_ies(':ytsubscriptions'), ['youtube:subscriptions']) + self.assertEqual(matching_ies(':thedailyshow'), ['ComedyCentral']) + self.assertEqual(matching_ies(':tds'), ['ComedyCentral']) + self.assertEqual(matching_ies(':colbertreport'), ['ComedyCentral']) + self.assertEqual(matching_ies(':cr'), ['ComedyCentral']) + if __name__ == '__main__': unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index c4b71362e..be1069105 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -4,6 +4,7 @@ import sys import unittest +import xml.etree.ElementTree # Allow direct execution import os @@ -16,6 +17,7 @@ from youtube_dl.utils import unescapeHTML from youtube_dl.utils import orderedSet from youtube_dl.utils import DateRange from youtube_dl.utils import unified_strdate +from youtube_dl.utils import find_xpath_attr if sys.version_info < (3, 0): _compat_str = lambda b: b.decode('unicode-escape') @@ -112,5 +114,18 @@ class TestUtil(unittest.TestCase): self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214') self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011') + def test_find_xpath_attr(self): + testxml = u''' + + + + + ''' + doc = xml.etree.ElementTree.fromstring(testxml) + + self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None) + self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1]) + self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2]) + if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_sig.py b/test/test_youtube_sig.py old mode 100755 new mode 100644 index e87b6259b..4d45a0e08 --- a/test/test_youtube_sig.py +++ b/test/test_youtube_sig.py @@ -13,9 +13,14 @@ from helper import FakeYDL sig = YoutubeIE(FakeYDL())._decrypt_signature class TestYoutubeSig(unittest.TestCase): - def test_43_43(self): - wrong = '5AEEAE0EC39677BC65FD9021CCD115F1F2DBD5A59E4.C0B243A3E2DED6769199AF3461781E75122AE135135' - right = '931EA22157E1871643FA9519676DED253A342B0C.4E95A5DBD2F1F511DCC1209DF56CB77693CE0EAE' + def test_92(self): + wrong = "F9F9B6E6FD47029957AB911A964CC20D95A181A5D37A2DBEFD67D403DB0E8BE4F4910053E4E8A79.0B70B.0B80B8" + right = "69B6E6FD47029957AB911A9F4CC20D95A181A5D3.A2DBEFD67D403DB0E8BE4F4910053E4E8A7980B7" + self.assertEqual(sig(wrong), right) + + def test_90(self): + wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`" + right = "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|" self.assertEqual(sig(wrong), right) def test_88(self): @@ -25,7 +30,7 @@ class TestYoutubeSig(unittest.TestCase): def test_87(self): wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<" - right = "!?;:|}][{=+-_)(*&^$#@/MNBVCXZASqFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr" + right = "tyuioplkjhgfdsazxcv" self.assertEqual(sig(wrong), right) def test_86(self): @@ -35,7 +40,7 @@ class TestYoutubeSig(unittest.TestCase): def test_85(self): wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<" - right = "{>/?;}[.=+-_)(*&^%$#@!MqBVCXZASDFwHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytr" + right = "ertyuiqplkjhgfdsazx$vbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#<%^&*()_-+={[};?/c" self.assertEqual(sig(wrong), right) def test_84(self): @@ -45,7 +50,7 @@ class TestYoutubeSig(unittest.TestCase): def test_83(self): wrong = "qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<" - right = "D.>/?;}[{=+_)(*&^%$#!MNBVCXeAS[^?/]+)(?:[?].*)?$' + _TEST = { + u"url": u"http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect", + u'file': u'XD300-23_68HighlightsAResearchCntAugHumanIntellect.ogv', + u'md5': u'8af1d4cf447933ed3c7f4871162602db', + u'info_dict': { + u"title": u"1968 Demo - FJCC Conference Presentation Reel #1", + u"description": u"Reel 1 of 3: Also known as the \"Mother of All Demos\", Doug Engelbart's presentation at the Fall Joint Computer Conference in San Francisco, December 9, 1968 titled \"A Research Center for Augmenting Human Intellect.\" For this presentation, Doug and his team astonished the audience by not only relating their research, but demonstrating it live. This was the debut of the mouse, interactive computing, hypermedia, computer supported software engineering, video teleconferencing, etc. See also Doug's 1968 Demo page for more background, highlights, links, and the detailed paper published in this conference proceedings. Filmed on 3 reels: Reel 1 | Reel 2 | Reel 3", + u"upload_date": u"19681210", + u"uploader": u"SRI International" + } + } + + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + json_url = url + (u'?' if u'?' in url else '&') + u'output=json' + json_data = self._download_webpage(json_url, video_id) + data = json.loads(json_data) + + title = data['metadata']['title'][0] + description = data['metadata']['description'][0] + uploader = data['metadata']['creator'][0] + upload_date = unified_strdate(data['metadata']['date'][0]) + + formats = [{ + 'format': fdata['format'], + 'url': 'http://' + data['server'] + data['dir'] + fn, + 'file_size': int(fdata['size']), + } + for fn,fdata in data['files'].items() + if 'Video' in fdata['format']] + formats.sort(key=lambda fdata: fdata['file_size']) + + info = { + '_type': 'video', + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': description, + 'uploader': uploader, + 'upload_date': upload_date, + } + thumbnail = data.get('misc', {}).get('image') + if thumbnail: + info['thumbnail'] = thumbnail + + # TODO: Remove when #980 has been merged + info['url'] = formats[-1]['url'] + info['ext'] = determine_ext(formats[-1]['url']) + + return info \ No newline at end of file diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 5793a4129..dbf8eed99 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -32,7 +32,7 @@ class ARDIE(InfoExtractor): # determine title and media streams from webpage html = self._download_webpage(url, video_id) title = re.search(self._TITLE, html).group('title') - streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)] + streams = [mo.groupdict() for mo in re.finditer(self._MEDIA_STREAM, html)] if not streams: assert '"fsk"' in html raise ExtractorError(u'This video is only available after 8:00 pm') diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index a030a28bb..993e30f7a 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -1,12 +1,11 @@ import re import json +import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( - # This is used by the not implemented extractLiveStream method - compat_urllib_parse, - ExtractorError, + find_xpath_attr, unified_strdate, ) @@ -17,7 +16,7 @@ class ArteTvIE(InfoExtractor): The videos expire in 7 days, so we can't add tests. """ _EMISSION_URL = r'(?:http://)?www\.arte.tv/guide/(?Pfr|de)/(?:(?:sendungen|emissions)/)?(?P.*?)/(?P.*?)(\?.*)?' - _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?:fr|de)/.*-(?P.*?).html' + _VIDEOS_URL = r'(?:http://)?videos.arte.tv/(?Pfr|de)/.*-(?P.*?).html' _LIVE_URL = r'index-[0-9]+\.html$' IE_NAME = u'arte.tv' @@ -27,6 +26,7 @@ class ArteTvIE(InfoExtractor): return any(re.match(regex, url) for regex in (cls._EMISSION_URL, cls._VIDEOS_URL)) # TODO implement Live Stream + # from ..utils import compat_urllib_parse # def extractLiveStream(self, url): # video_lang = url.split('/')[-4] # info = self.grep_webpage( @@ -56,7 +56,6 @@ class ArteTvIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._EMISSION_URL, url) if mobj is not None: - name = mobj.group('name') lang = mobj.group('lang') # This is not a real id, it can be for example AJT for the news # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal @@ -66,7 +65,8 @@ class ArteTvIE(InfoExtractor): mobj = re.match(self._VIDEOS_URL, url) if mobj is not None: id = mobj.group('id') - return self._extract_video(url, id) + lang = mobj.group('lang') + return self._extract_video(url, id, lang) if re.search(self._LIVE_URL, video_id) is not None: raise ExtractorError(u'Arte live streams are not yet supported, sorry') @@ -75,7 +75,8 @@ class ArteTvIE(InfoExtractor): def _extract_emission(self, url, video_id, lang): """Extract from www.arte.tv/guide""" - json_url = 'http://org-www.arte.tv/papi/tvguide/videos/stream/player/F/%s_PLUS7-F/ALL/ALL.json' % video_id + webpage = self._download_webpage(url, video_id) + json_url = self._html_search_regex(r'arte_vp_url="(.*?)"', webpage, 'json url') json_info = self._download_webpage(json_url, video_id, 'Downloading info json') self.report_extraction(video_id) @@ -113,13 +114,15 @@ class ArteTvIE(InfoExtractor): return info_dict - def _extract_video(self, url, video_id): + def _extract_video(self, url, video_id, lang): """Extract from videos.arte.tv""" - config_xml_url = url.replace('/videos/', '/do_delegate/videos/') - config_xml_url = config_xml_url.replace('.html', ',view,asPlayerXml.xml') - config_xml = self._download_webpage(config_xml_url, video_id) - config_xml_url = self._html_search_regex(r'