diff --git a/MANIFEST.in b/MANIFEST.in index 81f8e05cd..8f8af7a7f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,5 @@ include README.md include test/*.py -include test/*.json \ No newline at end of file +include test/*.json +include youtube-dl.bash-completion +include youtube-dl.1 diff --git a/Makefile b/Makefile index 966a685e1..84ea70d2c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,10 @@ all: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion clean: - rm -rf youtube-dl youtube-dl.exe youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz + rm -rf youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz + +cleanall: clean + rm -f youtube-dl youtube-dl.exe PREFIX=/usr/local BINDIR=$(PREFIX)/bin @@ -23,7 +26,9 @@ test: tar: youtube-dl.tar.gz -.PHONY: all clean install test tar +.PHONY: all clean install test tar bash-completion pypi-files + +pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl: youtube_dl/*.py zip --quiet youtube-dl youtube_dl/*.py @@ -45,6 +50,8 @@ youtube-dl.1: README.md youtube-dl.bash-completion: youtube_dl/*.py devscripts/bash-completion.in python devscripts/bash-completion.py +bash-completion: youtube-dl.bash-completion + youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion @tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \ --exclude '*.DS_Store' \ diff --git a/README.md b/README.md index a14dac9f4..7c09d0c0d 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,7 @@ which means you can modify it, redistribute it or use it however you like. --get-description simulate, quiet but print video description --get-filename simulate, quiet but print output filename --get-format simulate, quiet but print output format + --newline output progress bar as new lines --no-progress do not print progress bar --console-title display progress in console titlebar -v, --verbose print various debugging information diff --git a/devscripts/release.sh b/devscripts/release.sh index a5f07fd61..ee650f221 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -20,19 +20,19 @@ if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already pre if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi -echo "\n### First of all, testing..." -make clean -nosetests --with-coverage --cover-package=youtube_dl --cover-html test || exit 1 +/bin/echo -e "\n### First of all, testing..." +make cleanall +nosetests --with-coverage --cover-package=youtube_dl --cover-html test --stop || exit 1 -echo "\n### Changing version in version.py..." +/bin/echo -e "\n### Changing version in version.py..." sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py -echo "\n### Committing CHANGELOG README.md and youtube_dl/version.py..." +/bin/echo -e "\n### Committing CHANGELOG README.md and youtube_dl/version.py..." make README.md git add CHANGELOG README.md youtube_dl/version.py git commit -m "release $version" -echo "\n### Now tagging, signing and pushing..." +/bin/echo -e "\n### Now tagging, signing and pushing..." git tag -s -m "Release $version" "$version" git show "$version" read -p "Is it good, can I push? (y/n) " -n 1 @@ -42,7 +42,7 @@ MASTER=$(git rev-parse --abbrev-ref HEAD) git push origin $MASTER:master git push origin "$version" -echo "\n### OK, now it is time to build the binaries..." +/bin/echo -e "\n### OK, now it is time to build the binaries..." REV=$(git rev-parse HEAD) make youtube-dl youtube-dl.tar.gz wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe || \ @@ -57,11 +57,11 @@ RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS) git checkout HEAD -- youtube-dl youtube-dl.exe -echo "\n### Signing and uploading the new binaries to youtube-dl.org..." +/bin/echo -e "\n### Signing and uploading the new binaries to youtube-dl.org..." for f in $RELEASE_FILES; do gpg --detach-sig "build/$version/$f"; done scp -r "build/$version" ytdl@youtube-dl.org:html/downloads/ -echo "\n### Now switching to gh-pages..." +/bin/echo -e "\n### Now switching to gh-pages..." git clone --branch gh-pages --single-branch . build/gh-pages ROOT=$(pwd) ( @@ -83,7 +83,9 @@ ROOT=$(pwd) ) rm -rf build +make pypi-files echo "Uploading to PyPi ..." python setup.py sdist upload +make clean -echo "\n### DONE!" +/bin/echo -e "\n### DONE!" diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 06de8e7b8..69717b3fc 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -11,12 +11,18 @@ from youtube_dl.InfoExtractors import YoutubeIE, YoutubePlaylistIE class TestAllURLsMatching(unittest.TestCase): def test_youtube_playlist_matching(self): - self.assertTrue(YoutubePlaylistIE().suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')) - self.assertTrue(YoutubePlaylistIE().suitable(u'PL63F0C78739B09958')) - self.assertFalse(YoutubePlaylistIE().suitable(u'PLtS2H6bU1M')) + self.assertTrue(YoutubePlaylistIE.suitable(u'ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')) + self.assertTrue(YoutubePlaylistIE.suitable(u'UUBABnxM4Ar9ten8Mdjj1j0Q')) #585 + self.assertTrue(YoutubePlaylistIE.suitable(u'PL63F0C78739B09958')) + self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')) + self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')) + self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')) + self.assertTrue(YoutubePlaylistIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668 + self.assertFalse(YoutubePlaylistIE.suitable(u'PLtS2H6bU1M')) def test_youtube_matching(self): - self.assertTrue(YoutubeIE().suitable(u'PLtS2H6bU1M')) + self.assertTrue(YoutubeIE.suitable(u'PLtS2H6bU1M')) + self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668 def test_youtube_extract(self): self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 3044e0852..9c2e82ea3 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -8,7 +8,7 @@ import json import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from youtube_dl.InfoExtractors import YoutubeUserIE,YoutubePlaylistIE +from youtube_dl.InfoExtractors import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE from youtube_dl.utils import * PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json") @@ -35,39 +35,57 @@ class FakeDownloader(object): class TestYoutubeLists(unittest.TestCase): def test_youtube_playlist(self): - DL = FakeDownloader() - IE = YoutubePlaylistIE(DL) - IE.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') - self.assertEqual(DL.result, [ - ['http://www.youtube.com/watch?v=bV9L5Ht9LgY'], - ['http://www.youtube.com/watch?v=FXxLjLQi3Fg'], - ['http://www.youtube.com/watch?v=tU3Bgo5qJZE'] - ]) + dl = FakeDownloader() + ie = YoutubePlaylistIE(dl) + ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') + ytie_results = [YoutubeIE()._extract_id(r[0]) for r in dl.result] + self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE']) + + def test_issue_661(self): + dl = FakeDownloader() + ie = YoutubePlaylistIE(dl) + ie.extract('PLMCmkNmxw6Z9eduM7BZjSEh7HiU543Ig0') + self.assertTrue(len(dl.result) > 20) + + def test_issue_673(self): + dl = FakeDownloader() + ie = YoutubePlaylistIE(dl) + ie.extract('PLBB231211A4F62143') + self.assertTrue(len(dl.result) > 40) def test_youtube_playlist_long(self): - DL = FakeDownloader() - IE = YoutubePlaylistIE(DL) - IE.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') - self.assertTrue(len(DL.result) >= 799) + dl = FakeDownloader() + ie = YoutubePlaylistIE(dl) + ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') + self.assertTrue(len(dl.result) >= 799) + + def test_youtube_playlist_with_deleted(self): + #651 + dl = FakeDownloader() + ie = YoutubePlaylistIE(dl) + ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') + ytie_results = [YoutubeIE()._extract_id(r[0]) for r in dl.result] + self.assertFalse('pElCt5oNDuI' in ytie_results) + self.assertFalse('KdPEApIVdWM' in ytie_results) def test_youtube_course(self): - DL = FakeDownloader() - IE = YoutubePlaylistIE(DL) + dl = FakeDownloader() + ie = YoutubePlaylistIE(dl) # TODO find a > 100 (paginating?) videos course - IE.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') - self.assertEqual(DL.result[0], ['http://www.youtube.com/watch?v=j9WZyLZCBzs']) - self.assertEqual(len(DL.result), 25) - self.assertEqual(DL.result[-1], ['http://www.youtube.com/watch?v=rYefUsYuEp0']) + ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') + self.assertEqual(YoutubeIE()._extract_id(dl.result[0][0]), 'j9WZyLZCBzs') + self.assertEqual(len(dl.result), 25) + self.assertEqual(YoutubeIE()._extract_id(dl.result[-1][0]), 'rYefUsYuEp0') def test_youtube_channel(self): # I give up, please find a channel that does paginate and test this like test_youtube_playlist_long pass # TODO def test_youtube_user(self): - DL = FakeDownloader() - IE = YoutubeUserIE(DL) - IE.extract('https://www.youtube.com/user/TheLinuxFoundation') - self.assertTrue(len(DL.result) >= 320) + dl = FakeDownloader() + ie = YoutubeUserIE(dl) + ie.extract('https://www.youtube.com/user/TheLinuxFoundation') + self.assertTrue(len(dl.result) >= 320) if __name__ == '__main__': unittest.main() diff --git a/test/tests.json b/test/tests.json index 5c46af2c8..fd7eb2d65 100644 --- a/test/tests.json +++ b/test/tests.json @@ -286,5 +286,24 @@ "title": "test chars: \"'/\\รค<>This is a test video for youtube-dl.For more information, contact phihag@phihag.de ." } + }, + { + "name": "TED", + "url": "http://www.ted.com/talks/dan_dennett_on_our_consciousness.html", + "file": "102.mp4", + "md5": "7bc087e71d16f18f9b8ab9fa62a8a031", + "info_dict": { + "title": "Dan Dennett: The illusion of consciousness", + "thumbnail": "http://images.ted.com/images/ted/488_389x292.jpg" + } + }, + { + "name": "MySpass", + "url": "http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/", + "file": "11741.mp4", + "md5": "0b49f4844a068f8b33f4b7c88405862b", + "info_dict": { + "title": "Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2" + } } ] diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 10d32a20f..b5a003788 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -305,7 +305,11 @@ class FileDownloader(object): """Report download progress.""" if self.params.get('noprogress', False): return - self.to_screen(u'\r[download] %s of %s at %s ETA %s' % + if self.params.get('progress_with_newline', False): + self.to_screen(u'[download] %s of %s at %s ETA %s' % + (percent_str, data_len_str, speed_str, eta_str)) + else: + self.to_screen(u'\r[download] %s of %s at %s ETA %s' % (percent_str, data_len_str, speed_str, eta_str), skip_eol=True) self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' % (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip())) @@ -366,12 +370,10 @@ class FileDownloader(object): title = info_dict['title'] matchtitle = self.params.get('matchtitle', False) if matchtitle: - matchtitle = matchtitle.decode('utf8') if not re.search(matchtitle, title, re.IGNORECASE): return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' rejecttitle = self.params.get('rejecttitle', False) if rejecttitle: - rejecttitle = rejecttitle.decode('utf8') if re.search(rejecttitle, title, re.IGNORECASE): return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' return None @@ -550,7 +552,7 @@ class FileDownloader(object): self.to_stderr(u'ERROR: ' + e.msg) if keep_video is False and not self.params.get('keepvideo', False): try: - self.to_stderr(u'Deleting original file %s (pass -k to keep)' % filename) + self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename) os.remove(encodeFilename(filename)) except (IOError, OSError): self.to_stderr(u'WARNING: Unable to remove downloaded video file') diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index ac69f82fe..a94648dcf 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -15,6 +15,7 @@ import email.utils import xml.etree.ElementTree import random import math +import operator from .utils import * @@ -73,13 +74,15 @@ class InfoExtractor(object): self._ready = False self.set_downloader(downloader) - def suitable(self, url): + @classmethod + def suitable(cls, url): """Receives a URL and returns True if suitable for this IE.""" - return re.match(self._VALID_URL, url) is not None + return re.match(cls._VALID_URL, url) is not None - def working(self): + @classmethod + def working(cls): """Getter method for _WORKING.""" - return self._WORKING + return cls._WORKING def initialize(self): """Initializes an instance (authentication, etc).""" @@ -136,7 +139,6 @@ class YoutubeIE(InfoExtractor): (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/| tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls - (?!view_play_list|my_playlists|artist|playlist) # ignore playlist URLs (?: # the various things that can precede the ID: (?:(?:v|embed|e)/) # v/ or embed/ or e/ |(?: # or the v= param in all its forms @@ -188,9 +190,11 @@ class YoutubeIE(InfoExtractor): } IE_NAME = u'youtube' - def suitable(self, url): + @classmethod + def suitable(cls, url): """Receives a URL and returns True if suitable for this IE.""" - return re.match(self._VALID_URL, url, re.VERBOSE) is not None + if YoutubePlaylistIE.suitable(url): return False + return re.match(cls._VALID_URL, url, re.VERBOSE) is not None def report_lang(self): """Report attempt to set language.""" @@ -718,6 +722,7 @@ class DailymotionIE(InfoExtractor): _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' IE_NAME = u'dailymotion' + _WORKING = False def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) @@ -1329,7 +1334,7 @@ class GenericIE(InfoExtractor): opener = compat_urllib_request.OpenerDirector() for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler, HTTPMethodFallback, HEADRedirectHandler, - compat_urllib_error.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]: + compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]: opener.add_handler(handler()) response = opener.open(HeadRequest(url)) @@ -1365,6 +1370,9 @@ class GenericIE(InfoExtractor): if mobj is None: # Broaden the search a little bit mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage) + if mobj is None: + # Broaden the search a little bit: JWPlayer JS loader + mobj = re.search(r'[^A-Za-z0-9]?file:\s*["\'](http[^\'"&]*)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return @@ -1468,12 +1476,16 @@ class YoutubeSearchIE(InfoExtractor): result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1) request = compat_urllib_request.Request(result_url) try: - data = compat_urllib_request.urlopen(request).read() + data = compat_urllib_request.urlopen(request).read().decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable to download API page: %s' % compat_str(err)) return api_response = json.loads(data)['data'] + if not 'items' in api_response: + self._downloader.trouble(u'[youtube] No video results') + return + new_ids = list(video['id'] for video in api_response['items']) video_ids += new_ids @@ -1658,80 +1670,94 @@ class YahooSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL|EC)?|PL|EC)([0-9A-Za-z-_]{10,})(?:/.*?/([0-9A-Za-z_-]+))?.*' - _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' - _VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&([^&"]+&)*list=.*?%s' - _MORE_PAGES_INDICATOR = u"Next \N{RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK}" + _VALID_URL = r"""(?: + (?:https?://)? + (?:\w+\.)? + youtube\.com/ + (?: + (?:course|view_play_list|my_playlists|artist|playlist|watch) + \? (?:.*?&)*? (?:p|a|list)= + | user/.*?/user/ + | p/ + | user/.*?#[pg]/c/ + ) + ((?:PL|EC|UU)?[0-9A-Za-z-_]{10,}) + .* + | + ((?:PL|EC|UU)[0-9A-Za-z-_]{10,}) + )""" + _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json' + _MAX_RESULTS = 50 IE_NAME = u'youtube:playlist' def __init__(self, downloader=None): InfoExtractor.__init__(self, downloader) + @classmethod + def suitable(cls, url): + """Receives a URL and returns True if suitable for this IE.""" + return re.match(cls._VALID_URL, url, re.VERBOSE) is not None + def report_download_page(self, playlist_id, pagenum): """Report attempt to download playlist page with given number.""" self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) def _real_extract(self, url): # Extract playlist id - mobj = re.match(self._VALID_URL, url) + mobj = re.match(self._VALID_URL, url, re.VERBOSE) if mobj is None: self._downloader.trouble(u'ERROR: invalid url: %s' % url) return - # Single video case - if mobj.group(3) is not None: - self._downloader.download([mobj.group(3)]) - return - - # Download playlist pages - # prefix is 'p' as default for playlists but there are other types that need extra care - playlist_prefix = mobj.group(1) - if playlist_prefix == 'a': - playlist_access = 'artist' - else: - playlist_prefix = 'p' - playlist_access = 'view_play_list' - playlist_id = mobj.group(2) - video_ids = [] - pagenum = 1 + # Download playlist videos from API + playlist_id = mobj.group(1) or mobj.group(2) + page_num = 1 + videos = [] while True: - self.report_download_page(playlist_id, pagenum) - url = self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum) - request = compat_urllib_request.Request(url) + self.report_download_page(playlist_id, page_num) + + url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, self._MAX_RESULTS * (page_num - 1) + 1) try: - page = compat_urllib_request.urlopen(request).read().decode('utf-8') + page = compat_urllib_request.urlopen(url).read().decode('utf8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return - # Extract video identifiers - ids_in_page = [] - for mobj in re.finditer(self._VIDEO_INDICATOR_TEMPLATE % playlist_id, page): - if mobj.group(1) not in ids_in_page: - ids_in_page.append(mobj.group(1)) - video_ids.extend(ids_in_page) + try: + response = json.loads(page) + except ValueError as err: + self._downloader.trouble(u'ERROR: Invalid JSON in API response: ' + compat_str(err)) + return - if self._MORE_PAGES_INDICATOR not in page: + if not 'feed' in response or not 'entry' in response['feed']: + self._downloader.trouble(u'ERROR: Got a malformed response from YouTube API') + return + videos += [ (entry['yt$position']['$t'], entry['content']['src']) + for entry in response['feed']['entry'] + if 'content' in entry ] + + if len(response['feed']['entry']) < self._MAX_RESULTS: break - pagenum = pagenum + 1 + page_num += 1 - total = len(video_ids) + videos = [v[1] for v in sorted(videos)] + total = len(videos) playliststart = self._downloader.params.get('playliststart', 1) - 1 playlistend = self._downloader.params.get('playlistend', -1) if playlistend == -1: - video_ids = video_ids[playliststart:] + videos = videos[playliststart:] else: - video_ids = video_ids[playliststart:playlistend] + videos = videos[playliststart:playlistend] - if len(video_ids) == total: + if len(videos) == total: self._downloader.to_screen(u'[youtube] PL %s: Found %i videos' % (playlist_id, total)) else: - self._downloader.to_screen(u'[youtube] PL %s: Found %i videos, downloading %i' % (playlist_id, total, len(video_ids))) + self._downloader.to_screen(u'[youtube] PL %s: Found %i videos, downloading %i' % (playlist_id, total, len(videos))) - for id in video_ids: - self._downloader.download(['http://www.youtube.com/watch?v=%s' % id]) + for video in videos: + self._downloader.download([video]) return @@ -1917,9 +1943,8 @@ class BlipTVUserIE(InfoExtractor): while True: self.report_download_page(username, pagenum) - - request = compat_urllib_request.Request( page_base + "&page=" + str(pagenum) ) - + url = page_base + "&page=" + str(pagenum) + request = compat_urllib_request.Request( url ) try: page = compat_urllib_request.urlopen(request).read().decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: @@ -2097,6 +2122,10 @@ class FacebookIE(InfoExtractor): params_raw = compat_urllib_parse.unquote(data['params']) params = json.loads(params_raw) video_url = params['hd_src'] + if not video_url: + video_url = params['sd_src'] + if not video_url: + raise ExtractorError(u'Cannot find video URL') video_duration = int(params['video_duration']) m = re.search('

([^<]+)

', webpage) @@ -2232,7 +2261,7 @@ class MyVideoIE(InfoExtractor): webpage = self._download_webpage(webpage_url, video_id) self.report_extraction(video_id) - mobj = re.search(r'', + mobj = re.search(r'', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract media URL') @@ -2291,9 +2320,10 @@ class ComedyCentralIE(InfoExtractor): '400': '384x216', } - def suitable(self, url): + @classmethod + def suitable(cls, url): """Receives a URL and returns True if suitable for this IE.""" - return re.match(self._VALID_URL, url, re.VERBOSE) is not None + return re.match(cls._VALID_URL, url, re.VERBOSE) is not None def report_extraction(self, episode_id): self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id) @@ -3598,17 +3628,18 @@ class TweetReelIE(InfoExtractor): 'upload_date': upload_date } return [info] - + class SteamIE(InfoExtractor): - _VALID_URL = r"""http://store.steampowered.com/ + _VALID_URL = r"""http://store.steampowered.com/ (?Pvideo|app)/ #If the page is only for videos or for a game (?P\d+)/? (?P\d*)(?P\??) #For urltype == video we sometimes get the videoID """ - def suitable(self, url): + @classmethod + def suitable(cls, url): """Receives a URL and returns True if suitable for this IE.""" - return re.match(self._VALID_URL, url, re.VERBOSE) is not None + return re.match(cls._VALID_URL, url, re.VERBOSE) is not None def _real_extract(self, url): m = re.match(self._VALID_URL, url, re.VERBOSE) @@ -3619,18 +3650,22 @@ class SteamIE(InfoExtractor): mweb = re.finditer(urlRE, webpage) namesRE = r'(?P.+?)' titles = re.finditer(namesRE, webpage) + thumbsRE = r'' + thumbs = re.finditer(thumbsRE, webpage) videos = [] - for vid,vtitle in zip(mweb,titles): + for vid,vtitle,thumb in zip(mweb,titles,thumbs): video_id = vid.group('videoID') title = vtitle.group('videoName') video_url = vid.group('videoURL') + video_thumb = thumb.group('thumbnail') if not video_url: self._downloader.trouble(u'ERROR: Cannot find video url for %s' % video_id) info = { 'id':video_id, 'url':video_url, 'ext': 'flv', - 'title': unescapeHTML(title) + 'title': unescapeHTML(title), + 'thumbnail': video_thumb } videos.append(info) return videos @@ -3696,7 +3731,7 @@ class RBMARadioIE(InfoExtractor): class YouPornIE(InfoExtractor): """Information extractor for youporn.com.""" _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P[0-9]+)/(?P[^/]+)' - + def _print_formats(self, formats): """Print all available formats""" print(u'Available formats:') @@ -3724,13 +3759,13 @@ class YouPornIE(InfoExtractor): webpage = self._download_webpage(req, video_id) # Get the video title - result = re.search(r'videoTitleArea">(?P<title>.*)</h1>', webpage) + result = re.search(r'<h1.*?>(?P<title>.*)</h1>', webpage) if result is None: - raise ExtractorError(u'ERROR: unable to extract video title') + raise ExtractorError(u'Unable to extract video title') video_title = result.group('title').strip() # Get the video date - result = re.search(r'Date:</b>(?P<date>.*)</li>', webpage) + result = re.search(r'Date:</label>(?P<date>.*) </li>', webpage) if result is None: self._downloader.to_stderr(u'WARNING: unable to extract video date') upload_date = None @@ -3738,9 +3773,9 @@ class YouPornIE(InfoExtractor): upload_date = result.group('date').strip() # Get the video uploader - result = re.search(r'Submitted:</b>(?P<uploader>.*)</li>', webpage) + result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage) if result is None: - self._downloader.to_stderr(u'ERROR: unable to extract uploader') + self._downloader.to_stderr(u'WARNING: unable to extract uploader') video_uploader = None else: video_uploader = result.group('uploader').strip() @@ -3758,8 +3793,8 @@ class YouPornIE(InfoExtractor): links = re.findall(LINK_RE, download_list_html) if(len(links) == 0): raise ExtractorError(u'ERROR: no known formats available for video') - - self._downloader.to_screen(u'[youporn] Links found: %d' % len(links)) + + self._downloader.to_screen(u'[youporn] Links found: %d' % len(links)) formats = [] for link in links: @@ -3810,7 +3845,7 @@ class YouPornIE(InfoExtractor): return return [format] - + class PornotubeIE(InfoExtractor): """Information extractor for pornotube.com.""" @@ -3882,7 +3917,7 @@ class YouJizzIE(InfoExtractor): embed_page_url = result.group(0).strip() video_id = result.group('videoid') - + webpage = self._download_webpage(embed_page_url, video_id) # Get the video URL @@ -3912,7 +3947,7 @@ class EightTracksIE(InfoExtractor): webpage = self._download_webpage(url, playlist_id) - m = re.search(r"new TRAX.Mix\((.*?)\);\n*\s*TRAX.initSearchAutocomplete\('#search'\);", webpage, flags=re.DOTALL) + m = re.search(r"PAGE.mix = (.*?);\n", webpage, flags=re.DOTALL) if not m: raise ExtractorError(u'Cannot find trax information') json_like = m.group(1) @@ -3968,6 +4003,135 @@ class KeekIE(InfoExtractor): } return [info] +class TEDIE(InfoExtractor): + _VALID_URL=r'''http://www.ted.com/ + ( + ((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist + | + ((?P<type_talk>talks)) # We have a simple talk + ) + /(?P<name>\w+) # Here goes the name and then ".html" + ''' + + @classmethod + def suitable(cls, url): + """Receives a URL and returns True if suitable for this IE.""" + return re.match(cls._VALID_URL, url, re.VERBOSE) is not None + + def _real_extract(self, url): + m=re.match(self._VALID_URL, url, re.VERBOSE) + if m.group('type_talk'): + return [self._talk_info(url)] + else : + playlist_id=m.group('playlist_id') + name=m.group('name') + self._downloader.to_screen(u'[%s] Getting info of playlist %s: "%s"' % (self.IE_NAME,playlist_id,name)) + return self._playlist_videos_info(url,name,playlist_id) + + def _talk_video_link(self,mediaSlug): + '''Returns the video link for that mediaSlug''' + return 'http://download.ted.com/talks/%s.mp4' % mediaSlug + + def _playlist_videos_info(self,url,name,playlist_id=0): + '''Returns the videos of the playlist''' + video_RE=r''' + <li\ id="talk_(\d+)"([.\s]*?)data-id="(?P<video_id>\d+)" + ([.\s]*?)data-playlist_item_id="(\d+)" + ([.\s]*?)data-mediaslug="(?P<mediaSlug>.+?)" + ''' + video_name_RE=r'<p\ class="talk-title"><a href="(?P<talk_url>/talks/(.+).html)">(?P<fullname>.+?)</a></p>' + webpage=self._download_webpage(url, playlist_id, 'Downloading playlist webpage') + m_videos=re.finditer(video_RE,webpage,re.VERBOSE) + m_names=re.finditer(video_name_RE,webpage) + info=[] + for m_video, m_name in zip(m_videos,m_names): + video_id=m_video.group('video_id') + talk_url='http://www.ted.com%s' % m_name.group('talk_url') + info.append(self._talk_info(talk_url,video_id)) + return info + + def _talk_info(self, url, video_id=0): + """Return the video for the talk in the url""" + m=re.match(self._VALID_URL, url,re.VERBOSE) + videoName=m.group('name') + webpage=self._download_webpage(url, video_id, 'Downloading \"%s\" page' % videoName) + # If the url includes the language we get the title translated + title_RE=r'<h1><span id="altHeadline" >(?P<title>.*)</span></h1>' + title=re.search(title_RE, webpage).group('title') + info_RE=r'''<script\ type="text/javascript">var\ talkDetails\ =(.*?) + "id":(?P<videoID>[\d]+).*? + "mediaSlug":"(?P<mediaSlug>[\w\d]+?)"''' + thumb_RE=r'</span>[\s.]*</div>[\s.]*<img src="(?P<thumbnail>.*?)"' + thumb_match=re.search(thumb_RE,webpage) + info_match=re.search(info_RE,webpage,re.VERBOSE) + video_id=info_match.group('videoID') + mediaSlug=info_match.group('mediaSlug') + video_url=self._talk_video_link(mediaSlug) + info = { + 'id': video_id, + 'url': video_url, + 'ext': 'mp4', + 'title': title, + 'thumbnail': thumb_match.group('thumbnail') + } + return info + +class MySpassIE(InfoExtractor): + _VALID_URL = r'http://www.myspass.de/.*' + + def _real_extract(self, url): + META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s' + + # video id is the last path element of the URL + # usually there is a trailing slash, so also try the second but last + url_path = compat_urllib_parse_urlparse(url).path + url_parent_path, video_id = os.path.split(url_path) + if not video_id: + _, video_id = os.path.split(url_parent_path) + + # get metadata + metadata_url = META_DATA_URL_TEMPLATE % video_id + metadata_text = self._download_webpage(metadata_url, video_id) + metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8')) + + # extract values from metadata + url_flv_el = metadata.find('url_flv') + if url_flv_el is None: + self._downloader.trouble(u'ERROR: unable to extract download url') + return + video_url = url_flv_el.text + extension = os.path.splitext(video_url)[1][1:] + title_el = metadata.find('title') + if title_el is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + title = title_el.text + format_id_el = metadata.find('format_id') + if format_id_el is None: + format = ext + else: + format = format_id_el.text + description_el = metadata.find('description') + if description_el is not None: + description = description_el.text + else: + description = None + imagePreview_el = metadata.find('imagePreview') + if imagePreview_el is not None: + thumbnail = imagePreview_el.text + else: + thumbnail = None + info = { + 'id': video_id, + 'url': video_url, + 'title': title, + 'ext': extension, + 'format': format, + 'thumbnail': thumbnail, + 'description': description + } + return [info] + def gen_extractors(): """ Return a list of an instance of every supported extractor. The order does matter; the first extractor matched is the one handling the URL. @@ -4015,6 +4179,8 @@ def gen_extractors(): RBMARadioIE(), EightTracksIE(), KeekIE(), + TEDIE(), + MySpassIE(), GenericIE() ] diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 1a4247786..ada101a18 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -202,6 +202,8 @@ def parseOpts(): verbosity.add_option('--get-format', action='store_true', dest='getformat', help='simulate, quiet but print output format', default=False) + verbosity.add_option('--newline', + action='store_true', dest='progress_with_newline', help='output progress bar as new lines', default=False) verbosity.add_option('--no-progress', action='store_true', dest='noprogress', help='do not print progress bar', default=False) verbosity.add_option('--console-title', @@ -210,7 +212,6 @@ def parseOpts(): verbosity.add_option('-v', '--verbose', action='store_true', dest='verbose', help='print various debugging information', default=False) - filesystem.add_option('-t', '--title', action='store_true', dest='usetitle', help='use title in file name', default=False) filesystem.add_option('--id', @@ -415,6 +416,7 @@ def _real_main(): or (opts.useid and u'%(id)s.%(ext)s') or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') or u'%(id)s.%(ext)s') + # File downloader fd = FileDownloader({ 'usenetrc': opts.usenetrc, @@ -442,6 +444,7 @@ def _real_main(): 'noresizebuffer': opts.noresizebuffer, 'continuedl': opts.continue_dl, 'noprogress': opts.noprogress, + 'progress_with_newline': opts.progress_with_newline, 'playliststart': opts.playliststart, 'playlistend': opts.playlistend, 'logtostderr': opts.outtmpl == '-', @@ -452,8 +455,8 @@ def _real_main(): 'writeinfojson': opts.writeinfojson, 'writesubtitles': opts.writesubtitles, 'subtitleslang': opts.subtitleslang, - 'matchtitle': opts.matchtitle, - 'rejecttitle': opts.rejecttitle, + 'matchtitle': decodeOption(opts.matchtitle), + 'rejecttitle': decodeOption(opts.rejecttitle), 'max_downloads': opts.max_downloads, 'prefer_free_formats': opts.prefer_free_formats, 'verbose': opts.verbose, diff --git a/youtube_dl/update.py b/youtube_dl/update.py index f6e3e5c69..b446dd94c 100644 --- a/youtube_dl/update.py +++ b/youtube_dl/update.py @@ -77,10 +77,8 @@ def update_self(to_screen, verbose, filename): to_screen(u'Updating to version ' + versions_info['latest'] + '...') version = versions_info['versions'][versions_info['latest']] - if version.get('notes'): - to_screen(u'PLEASE NOTE:') - for note in version['notes']: - to_screen(note) + + print_notes(versions_info['versions']) if not os.access(filename, os.W_OK): to_screen(u'ERROR: no write permissions on %s' % filename) @@ -158,3 +156,13 @@ del "%s" return to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.') + +def print_notes(versions, fromVersion=__version__): + notes = [] + for v,vdata in sorted(versions.items()): + if v > fromVersion: + notes.extend(vdata.get('notes', [])) + if notes: + to_screen(u'PLEASE NOTE:') + for note in notes: + to_screen(note) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e6ce028d6..95bd94843 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -420,6 +420,14 @@ def encodeFilename(s): encoding = 'utf-8' return s.encode(encoding, 'ignore') +def decodeOption(optval): + if optval is None: + return optval + if isinstance(optval, bytes): + optval = optval.decode(preferredencoding()) + + assert isinstance(optval, compat_str) + return optval class ExtractorError(Exception): """Error during info extraction.""" diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8b231ae80..ce8f6ca23 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.02.02' +__version__ = '2013.02.25'