diff --git a/AUTHORS b/AUTHORS index 48769320a..9c65dc1d4 100644 --- a/AUTHORS +++ b/AUTHORS @@ -120,3 +120,5 @@ Jeff Buchbinder Amish Bhadeshia Joram Schrijver Will W. +Mohammad Teimori Pabandi +Roman Le Négrate diff --git a/README.md b/README.md index 4f9fc8174..caa1478d9 100644 --- a/README.md +++ b/README.md @@ -45,21 +45,21 @@ which means you can modify it, redistribute it or use it however you like. youtube-dl [OPTIONS] URL [URL...] # OPTIONS - -h, --help print this help text and exit - --version print program version and exit - -U, --update update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed) - -i, --ignore-errors continue on download errors, for example to skip unavailable videos in a playlist + -h, --help Print this help text and exit + --version Print program version and exit + -U, --update Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed) + -i, --ignore-errors Continue on download errors, for example to skip unavailable videos in a playlist --abort-on-error Abort downloading of further videos (in the playlist or the command line) if an error occurs - --dump-user-agent display the current browser identification + --dump-user-agent Display the current browser identification --list-extractors List all supported extractors and the URLs they would handle --extractor-descriptions Output descriptions of all supported extractors - --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". + --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching. --ignore-config Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: Do not read the user configuration in ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows) --flat-playlist Do not extract the videos of a playlist, only list them. - --no-color Do not emit color codes in output. + --no-color Do not emit color codes in output ## Network Options: --proxy URL Use the specified HTTP/HTTPS proxy. Pass in an empty string (--proxy "") for direct connection @@ -71,70 +71,70 @@ which means you can modify it, redistribute it or use it however you like. not present) is used for the actual downloading. (experimental) ## Video Selection: - --playlist-start NUMBER playlist video to start at (default is 1) - --playlist-end NUMBER playlist video to end at (default is last) - --playlist-items ITEM_SPEC playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" + --playlist-start NUMBER Playlist video to start at (default is 1) + --playlist-end NUMBER Playlist video to end at (default is last) + --playlist-items ITEM_SPEC Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13. - --match-title REGEX download only matching titles (regex or caseless sub-string) - --reject-title REGEX skip download for matching titles (regex or caseless sub-string) + --match-title REGEX Download only matching titles (regex or caseless sub-string) + --reject-title REGEX Skip download for matching titles (regex or caseless sub-string) --max-downloads NUMBER Abort after downloading NUMBER files --min-filesize SIZE Do not download any videos smaller than SIZE (e.g. 50k or 44.6m) --max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m) - --date DATE download only videos uploaded in this date - --datebefore DATE download only videos uploaded on or before this date (i.e. inclusive) - --dateafter DATE download only videos uploaded on or after this date (i.e. inclusive) + --date DATE Download only videos uploaded in this date + --datebefore DATE Download only videos uploaded on or before this date (i.e. inclusive) + --dateafter DATE Download only videos uploaded on or after this date (i.e. inclusive) --min-views COUNT Do not download any videos with less than COUNT views --max-views COUNT Do not download any videos with more than COUNT views - --match-filter FILTER (Experimental) Generic video filter. Specify any key (see help for -o for a list of available keys) to match if the key is present, + --match-filter FILTER Generic video filter (experimental). Specify any key (see help for -o for a list of available keys) to match if the key is present, !key to check if the key is not present,key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number, and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the operator.For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 & dislike_count 10M]"). This works for filesize, height, width, tbr, abr, vbr, asr, and fps and the comparisons <, <=, >, >=, =, != and for ext, acodec, @@ -194,44 +194,44 @@ which means you can modify it, redistribute it or use it however you like. Use commas to download multiple audio formats, such as -f 136/137/mp4/bestvideo,140/m4a/bestaudio. You can merge the video and audio of two formats into a single file using -f + (requires ffmpeg or avconv), for example -f bestvideo+bestaudio. - --all-formats download all available video formats - --prefer-free-formats prefer free video formats unless a specific one is requested - --max-quality FORMAT highest quality format to download - -F, --list-formats list all available formats + --all-formats Download all available video formats + --prefer-free-formats Prefer free video formats unless a specific one is requested + --max-quality FORMAT Highest quality format to download + -F, --list-formats List all available formats --youtube-skip-dash-manifest Do not download the DASH manifest on YouTube videos --merge-output-format FORMAT If a merge is required (e.g. bestvideo+bestaudio), output to given container format. One of mkv, mp4, ogg, webm, flv.Ignored if no merge is required ## Subtitle Options: - --write-sub write subtitle file - --write-auto-sub write automatic subtitle file (youtube only) - --all-subs downloads all the available subtitles of the video - --list-subs lists all available subtitles for the video - --sub-format FORMAT subtitle format, accepts formats preference, for example: "ass/srt/best" - --sub-lang LANGS languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt' + --write-sub Write subtitle file + --write-auto-sub Write automatic subtitle file (YouTube only) + --all-subs Download all the available subtitles of the video + --list-subs List all available subtitles for the video + --sub-format FORMAT Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best" + --sub-lang LANGS Languages of the subtitles to download (optional) separated by commas, use IETF language tags like 'en,pt' ## Authentication Options: - -u, --username USERNAME login with this account ID - -p, --password PASSWORD account password. If this option is left out, youtube-dl will ask interactively. - -2, --twofactor TWOFACTOR two-factor auth code - -n, --netrc use .netrc authentication data - --video-password PASSWORD video password (vimeo, smotri) + -u, --username USERNAME Login with this account ID + -p, --password PASSWORD Account password. If this option is left out, youtube-dl will ask interactively. + -2, --twofactor TWOFACTOR Two-factor auth code + -n, --netrc Use .netrc authentication data + --video-password PASSWORD Video password (vimeo, smotri) ## Post-processing Options: - -x, --extract-audio convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe) - --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default - --audio-quality QUALITY ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K - (default 5) + -x, --extract-audio Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe) + --audio-format FORMAT Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "best" by default + --audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default + 5) --recode-video FORMAT Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv) - -k, --keep-video keeps the video file on disk after the post-processing; the video is erased by default - --no-post-overwrites do not overwrite post-processed files; the post-processed files are overwritten by default - --embed-subs embed subtitles in the video (only for mp4 videos) - --embed-thumbnail embed thumbnail in the audio as cover art - --add-metadata write metadata to the video file - --metadata-from-title FORMAT parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed + -k, --keep-video Keep the video file on disk after the post-processing; the video is erased by default + --no-post-overwrites Do not overwrite post-processed files; the post-processed files are overwritten by default + --embed-subs Embed subtitles in the video (only for mp4 videos) + --embed-thumbnail Embed thumbnail in the audio as cover art + --add-metadata Write metadata to the video file + --metadata-from-title FORMAT Parse additional metadata like song title / artist from the video title. The format syntax is the same as --output, the parsed parameters replace existing values. Additional templates: %(album), %(artist). Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like "Coldplay - Paradise" - --xattrs write metadata to the video file's xattrs (using dublin core and xdg standards) + --xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards) --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn(the default; fix file if we can, warn otherwise) --prefer-avconv Prefer avconv over ffmpeg for running the postprocessors (default) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 2785b9587..c85a39918 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -166,6 +166,7 @@ - **Gamekings** - **GameOne** - **gameone:playlist** + - **Gamersyde** - **GameSpot** - **GameStar** - **Gametrailers** @@ -351,6 +352,7 @@ - **PornHub** - **PornHubPlaylist** - **Pornotube** + - **PornoVoisines** - **PornoXO** - **PrimeShareTV** - **PromptFile** @@ -362,6 +364,7 @@ - **radio.de** - **radiobremen** - **radiofrance** + - **RadioJavan** - **Rai** - **RBMARadio** - **RedTube** @@ -422,6 +425,7 @@ - **southpark.cc.com** - **southpark.de** - **Space** + - **SpankBang** - **Spankwire** - **Spiegel** - **Spiegel:Article**: Articles on spiegel.de @@ -501,6 +505,7 @@ - **Ubu** - **udemy** - **udemy:course** + - **UDNEmbed** - **Ultimedia** - **Unistra** - **Urort**: NRK P3 Urørt diff --git a/test/test_execution.py b/test/test_execution.py index f31e51558..620db080e 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -8,6 +8,9 @@ import unittest import sys import os import subprocess +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.utils import encodeArgument rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -31,7 +34,7 @@ class TestExecution(unittest.TestCase): def test_cmdline_umlauts(self): p = subprocess.Popen( - [sys.executable, 'youtube_dl/__main__.py', 'ä', '--version'], + [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'], cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE) _, stderr = p.communicate() self.assertFalse(stderr) diff --git a/test/test_utils.py b/test/test_utils.py index abaf1ab73..2e3a6480c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -227,6 +227,7 @@ class TestUtil(unittest.TestCase): self.assertEqual( unified_strdate('2/2/2015 6:47:40 PM', day_first=False), '20150202') + self.assertEqual(unified_strdate('25-09-2014'), '20140925') def test_find_xpath_attr(self): testxml = ''' @@ -470,6 +471,12 @@ class TestUtil(unittest.TestCase): self.assertEqual(d['x'], 1) self.assertEqual(d['y'], 'a') + on = js_to_json('["abc", "def",]') + self.assertEqual(json.loads(on), ['abc', 'def']) + + on = js_to_json('{"abc": "def",}') + self.assertEqual(json.loads(on), {'abc': 'def'}) + def test_clean_html(self): self.assertEqual(clean_html('a:\nb'), 'a: b') self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index ce4b72fd3..640b8c99d 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1768,14 +1768,6 @@ class YoutubeDL(object): debuglevel = 1 if self.params.get('debug_printtraffic') else 0 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) - # The ssl context is only available in python 2.7.9 and 3.x - if hasattr(https_handler, '_context'): - ctx = https_handler._context - # get_ca_certs is unavailable prior to python 3.4 - if hasattr(ctx, 'get_ca_certs') and len(ctx.get_ca_certs()) == 0: - self.report_warning( - 'No ssl certificates were loaded, urls that use https ' - 'won\'t work') ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) opener = compat_urllib_request.build_opener( proxy_handler, https_handler, cookie_processor, ydlh) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 852b2fc3d..1c8b411b7 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -189,10 +189,6 @@ def _real_main(argv=None): if opts.allsubtitles and not opts.writeautomaticsub: opts.writesubtitles = True - if sys.version_info < (3,): - # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) - if opts.outtmpl is not None: - opts.outtmpl = opts.outtmpl.decode(preferredencoding()) outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index aae4aae4c..bbf3be41d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -90,6 +90,7 @@ from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .condenast import CondeNastIE from .cracked import CrackedIE from .criterion import CriterionIE +from .crooksandliars import CrooksAndLiarsIE from .crunchyroll import ( CrunchyrollIE, CrunchyrollShowPlaylistIE @@ -177,6 +178,7 @@ from .gameone import ( GameOneIE, GameOnePlaylistIE, ) +from .gamersyde import GamersydeIE from .gamespot import GameSpotIE from .gamestar import GameStarIE from .gametrailers import GametrailersIE @@ -387,6 +389,7 @@ from .pornhub import ( PornHubPlaylistIE, ) from .pornotube import PornotubeIE +from .pornovoisines import PornoVoisinesIE from .pornoxo import PornoXOIE from .primesharetv import PrimeShareTVIE from .promptfile import PromptFileIE @@ -396,6 +399,7 @@ from .pyvideo import PyvideoIE from .quickvid import QuickVidIE from .r7 import R7IE from .radiode import RadioDeIE +from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE from .rai import RaiIE @@ -468,6 +472,7 @@ from .southpark import ( SouthparkDeIE, ) from .space import SpaceIE +from .spankbang import SpankBangIE from .spankwire import SpankwireIE from .spiegel import SpiegelIE, SpiegelArticleIE from .spiegeltv import SpiegeltvIE @@ -553,6 +558,7 @@ from .udemy import ( UdemyIE, UdemyCourseIE ) +from .udn import UDNEmbedIE from .ultimedia import UltimediaIE from .unistra import UnistraIE from .urort import UrortIE diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index 203936e54..e3e6d2113 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -11,12 +11,13 @@ from ..compat import ( ) from ..utils import ( ExtractorError, + qualities, ) class AddAnimeIE(InfoExtractor): - _VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video\.php\?(?:.*?)v=(?P[\w_]+)(?:.*)' - _TEST = { + _VALID_URL = r'http://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P[\w_]+)' + _TESTS = [{ 'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', 'md5': '72954ea10bc979ab5e2eb288b21425a0', 'info_dict': { @@ -25,7 +26,10 @@ class AddAnimeIE(InfoExtractor): 'description': 'One Piece 606', 'title': 'One Piece 606', } - } + }, { + 'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -63,8 +67,10 @@ class AddAnimeIE(InfoExtractor): note='Confirming after redirect') webpage = self._download_webpage(url, video_id) + FORMATS = ('normal', 'hq') + quality = qualities(FORMATS) formats = [] - for format_id in ('normal', 'hq'): + for format_id in FORMATS: rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id) video_url = self._search_regex(rex, webpage, 'video file URLx', fatal=False) @@ -73,6 +79,7 @@ class AddAnimeIE(InfoExtractor): formats.append({ 'format_id': format_id, 'url': video_url, + 'quality': quality(format_id), }) self._sort_formats(formats) video_title = self._og_search_title(webpage) diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py index 8442019ea..a117502bc 100644 --- a/youtube_dl/extractor/aftonbladet.py +++ b/youtube_dl/extractor/aftonbladet.py @@ -2,10 +2,11 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import int_or_none class AftonbladetIE(InfoExtractor): - _VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?Particle[0-9]+)\.ab(?:$|[?#])' + _VALID_URL = r'http://tv\.aftonbladet\.se/webbtv.+?(?Particle[0-9]+)\.ab(?:$|[?#])' _TEST = { 'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab', 'info_dict': { @@ -43,9 +44,9 @@ class AftonbladetIE(InfoExtractor): formats.append({ 'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']), 'ext': 'mp4', - 'width': fmt['width'], - 'height': fmt['height'], - 'tbr': fmt['bitrate'], + 'width': int_or_none(fmt.get('width')), + 'height': int_or_none(fmt.get('height')), + 'tbr': int_or_none(fmt.get('bitrate')), 'protocol': 'http', }) self._sort_formats(formats) @@ -54,9 +55,9 @@ class AftonbladetIE(InfoExtractor): 'id': video_id, 'title': internal_meta_json['title'], 'formats': formats, - 'thumbnail': internal_meta_json['imageUrl'], - 'description': internal_meta_json['shortPreamble'], - 'timestamp': internal_meta_json['timePublished'], - 'duration': internal_meta_json['duration'], - 'view_count': internal_meta_json['views'], + 'thumbnail': internal_meta_json.get('imageUrl'), + 'description': internal_meta_json.get('shortPreamble'), + 'timestamp': int_or_none(internal_meta_json.get('timePublished')), + 'duration': int_or_none(internal_meta_json.get('duration')), + 'view_count': int_or_none(internal_meta_json.get('views')), } diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index 8c7ba4b91..b632ce967 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -172,6 +172,7 @@ class BlipTVIE(InfoExtractor): 'width': int_or_none(media_content.get('width')), 'height': int_or_none(media_content.get('height')), }) + self._check_formats(formats, video_id) self._sort_formats(formats) subtitles = self.extract_subtitles(video_id, subtitles_urls) diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index 4a88ccd13..0dca29b71 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -6,32 +6,39 @@ from .common import InfoExtractor class BloombergIE(InfoExtractor): - _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P.+?)\.html' + _VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P[^/?#]+)' _TEST = { - 'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', + 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2', # The md5 checksum changes 'info_dict': { 'id': 'qurhIVlJSB6hzkVi229d8g', 'ext': 'flv', 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies', - 'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88', + 'description': 'md5:a8ba0302912d03d246979735c17d2761', }, } def _real_extract(self, url): name = self._match_id(url) webpage = self._download_webpage(url, name) - - f4m_url = self._search_regex( - r'.+?/(?P[^/]+?)(?:\.(?:cnn|hln|ktvk)(?:-ap)?|(?=&)))''' + (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z]{3,5})(?:-ap)?|(?=&)))''' _TESTS = [{ 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', @@ -48,6 +48,9 @@ class CNNIE(InfoExtractor): }, { 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk', 'only_matching': True, + }, { + 'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e5245ec3f..530c449c1 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -822,7 +822,7 @@ class InfoExtractor(object): (media_el.attrib.get('href') or media_el.attrib.get('url'))) tbr = int_or_none(media_el.attrib.get('bitrate')) formats.append({ - 'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])), + 'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])), 'url': manifest_url, 'ext': 'flv', 'tbr': tbr, diff --git a/youtube_dl/extractor/crooksandliars.py b/youtube_dl/extractor/crooksandliars.py new file mode 100644 index 000000000..443eb7691 --- /dev/null +++ b/youtube_dl/extractor/crooksandliars.py @@ -0,0 +1,60 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + qualities, +) + + +class CrooksAndLiarsIE(InfoExtractor): + _VALID_URL = r'https?://embed\.crooksandliars\.com/(?:embed|v)/(?P<id>[A-Za-z0-9]+)' + _TESTS = [{ + 'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi', + 'info_dict': { + 'id': '8RUoRhRi', + 'ext': 'mp4', + 'title': 'Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!', + 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f', + 'thumbnail': 're:^https?://.*\.jpg', + 'timestamp': 1428207000, + 'upload_date': '20150405', + 'uploader': 'Heather', + 'duration': 236, + } + }, { + 'url': 'http://embed.crooksandliars.com/v/MTE3MjUtMzQ2MzA', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://embed.crooksandliars.com/embed/%s' % video_id, video_id) + + manifest = self._parse_json( + self._search_regex( + r'var\s+manifest\s*=\s*({.+?})\n', webpage, 'manifest JSON'), + video_id) + + quality = qualities(('webm_low', 'mp4_low', 'webm_high', 'mp4_high')) + + formats = [{ + 'url': item['url'], + 'format_id': item['type'], + 'quality': quality(item['type']), + } for item in manifest['flavors'] if item['mime'].startswith('video/')] + self._sort_formats(formats) + + return { + 'url': url, + 'id': video_id, + 'title': manifest['title'], + 'description': manifest.get('description'), + 'thumbnail': self._proto_relative_url(manifest.get('poster')), + 'timestamp': int_or_none(manifest.get('created')), + 'uploader': manifest.get('author'), + 'duration': int_or_none(manifest.get('duration')), + 'formats': formats, + } diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 47d58330b..7615ecd4b 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -224,7 +224,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): class DailymotionUserIE(DailymotionPlaylistIE): IE_NAME = 'dailymotion:user' - _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P<user>[^/]+)' + _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?:old/)?user/(?P<user>[^/]+)' _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s' _TESTS = [{ 'url': 'https://www.dailymotion.com/user/nqtv', diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 69ca75423..05bb22ddf 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -3,22 +3,25 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import ( + ExtractorError, + unified_strdate, +) class DreiSatIE(InfoExtractor): IE_NAME = '3sat' _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' _TEST = { - 'url': 'http://www.3sat.de/mediathek/index.php?obj=36983', - 'md5': '9dcfe344732808dbfcc901537973c922', + 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', + 'md5': 'be37228896d30a88f315b638900a026e', 'info_dict': { - 'id': '36983', + 'id': '45918', 'ext': 'mp4', - 'title': 'Kaffeeland Schweiz', - 'description': 'md5:cc4424b18b75ae9948b13929a0814033', + 'title': 'Waidmannsheil', + 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', 'uploader': '3sat', - 'upload_date': '20130622' + 'upload_date': '20140913' } } @@ -28,6 +31,15 @@ class DreiSatIE(InfoExtractor): details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id details_doc = self._download_xml(details_url, video_id, 'Downloading video details') + status_code = details_doc.find('./status/statuscode') + if status_code is not None and status_code.text != 'ok': + code = status_code.text + if code == 'notVisibleAnymore': + message = 'Video %s is not available' % video_id + else: + message = '%s returned error: %s' % (self.IE_NAME, code) + raise ExtractorError(message, expected=True) + thumbnail_els = details_doc.findall('.//teaserimage') thumbnails = [{ 'width': int(te.attrib['key'].partition('x')[0]), diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 8257e35a4..f25ab319e 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor, ExtractorError @@ -8,16 +9,16 @@ class DRTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)' _TEST = { - 'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8', - 'md5': '4a7e1dd65cdb2643500a3f753c942f25', + 'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5', + 'md5': 'dc515a9ab50577fa14cc4e4b0265168f', 'info_dict': { - 'id': 'partiets-mand-7-8', + 'id': 'panisk-paske-5', 'ext': 'mp4', - 'title': 'Partiets mand (7:8)', - 'description': 'md5:a684b90a8f9336cd4aab94b7647d7862', - 'timestamp': 1403047940, - 'upload_date': '20140617', - 'duration': 1299.040, + 'title': 'Panisk Påske (5)', + 'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c', + 'timestamp': 1426984612, + 'upload_date': '20150322', + 'duration': 1455, }, } @@ -26,6 +27,10 @@ class DRTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + if '>Programmet er ikke længere tilgængeligt' in webpage: + raise ExtractorError( + 'Video %s is not available' % video_id, expected=True) + video_id = self._search_regex( r'data-(?:material-identifier|episode-slug)="([^"]+)"', webpage, 'video id') diff --git a/youtube_dl/extractor/dump.py b/youtube_dl/extractor/dump.py index 6b651778a..ff78d4fd2 100644 --- a/youtube_dl/extractor/dump.py +++ b/youtube_dl/extractor/dump.py @@ -28,12 +28,12 @@ class DumpIE(InfoExtractor): video_url = self._search_regex( r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL') - thumb = self._og_search_thumbnail(webpage) - title = self._search_regex(r'<b>([^"]+)</b>', webpage, 'title') + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) return { 'id': video_id, 'title': title, 'url': video_url, - 'thumbnail': thumb, + 'thumbnail': thumbnail, } diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py index e43bc81b2..9c594b757 100644 --- a/youtube_dl/extractor/dumpert.py +++ b/youtube_dl/extractor/dumpert.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import base64 from .common import InfoExtractor +from ..compat import compat_urllib_request from ..utils import qualities @@ -23,7 +24,10 @@ class DumpertIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + + req = compat_urllib_request.Request(url) + req.add_header('Cookie', 'nsfw=1') + webpage = self._download_webpage(req, video_id) files_base64 = self._search_regex( r'data-files="([^"]+)"', webpage, 'data files') diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index 7173371ee..688dfc2f7 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -45,6 +45,7 @@ class EaglePlatformIE(InfoExtractor): 'duration': 216, 'view_count': int, }, + 'skip': 'Georestricted', }] def _handle_error(self, response): diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index fc92ff825..5154bbd7f 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -13,15 +13,15 @@ from ..utils import ( class EllenTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?:ellentv|ellentube)\.com/videos/(?P<id>[a-z0-9_-]+)' _TESTS = [{ - 'url': 'http://www.ellentv.com/videos/0-7jqrsr18/', - 'md5': 'e4af06f3bf0d5f471921a18db5764642', + 'url': 'http://www.ellentv.com/videos/0-ipq1gsai/', + 'md5': '8e3c576bf2e9bfff4d76565f56f94c9c', 'info_dict': { - 'id': '0-7jqrsr18', + 'id': '0-ipq1gsai', 'ext': 'mp4', - 'title': 'What\'s Wrong with These Photos? A Whole Lot', - 'description': 'md5:35f152dc66b587cf13e6d2cf4fa467f6', - 'timestamp': 1406876400, - 'upload_date': '20140801', + 'title': 'Fast Fingers of Fate', + 'description': 'md5:686114ced0a032926935e9015ee794ac', + 'timestamp': 1428033600, + 'upload_date': '20150403', } }, { 'url': 'http://ellentube.com/videos/0-dvzmabd5/', @@ -40,14 +40,15 @@ class EllenTVIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_url = self._html_search_meta('VideoURL', webpage, 'url') + + video_url = self._html_search_meta('VideoURL', webpage, 'url', fatal=True) title = self._og_search_title(webpage, default=None) or self._search_regex( r'pageName\s*=\s*"([^"]+)"', webpage, 'title') description = self._html_search_meta( 'description', webpage, 'description') or self._og_search_description(webpage) timestamp = parse_iso8601(self._search_regex( r'<span class="publish-date"><time datetime="([^"]+)">', - webpage, 'timestamp')) + webpage, 'timestamp', fatal=False)) return { 'id': video_id, diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 170d68075..edf555b29 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -14,7 +14,9 @@ from ..utils import ( clean_html, ExtractorError, int_or_none, + float_or_none, parse_duration, + determine_ext, ) @@ -50,7 +52,8 @@ class FranceTVBaseInfoExtractor(InfoExtractor): if not video_url: continue format_id = video['format'] - if video_url.endswith('.f4m'): + ext = determine_ext(video_url) + if ext == 'f4m': if georestricted: # See https://github.com/rg3/youtube-dl/issues/3963 # m3u8 urls work fine @@ -60,12 +63,9 @@ class FranceTVBaseInfoExtractor(InfoExtractor): 'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path, video_id, 'Downloading f4m manifest token', fatal=False) if f4m_url: - f4m_formats = self._extract_f4m_formats(f4m_url, video_id) - for f4m_format in f4m_formats: - f4m_format['preference'] = 1 - formats.extend(f4m_formats) - elif video_url.endswith('.m3u8'): - formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4')) + formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id)) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id)) elif video_url.startswith('rtmp'): formats.append({ 'url': video_url, @@ -86,7 +86,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor): 'title': info['titre'], 'description': clean_html(info['synopsis']), 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']), - 'duration': parse_duration(info['duree']), + 'duration': float_or_none(info.get('real_duration'), 1000) or parse_duration(info['duree']), 'timestamp': int_or_none(info['diffusion']['timestamp']), 'formats': formats, } @@ -260,22 +260,28 @@ class CultureboxIE(FranceTVBaseInfoExtractor): _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)' _TEST = { - 'url': 'http://culturebox.francetvinfo.fr/festivals/dans-les-jardins-de-william-christie/dans-les-jardins-de-william-christie-le-camus-162553', - 'md5': '5ad6dec1ffb2a3fbcb20cc4b744be8d6', + 'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511', + 'md5': '9b88dc156781c4dbebd4c3e066e0b1d6', 'info_dict': { - 'id': 'EV_22853', + 'id': 'EV_50111', 'ext': 'flv', - 'title': 'Dans les jardins de William Christie - Le Camus', - 'description': 'md5:4710c82315c40f0c865ca8b9a68b5299', - 'upload_date': '20140829', - 'timestamp': 1409317200, + 'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne", + 'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9', + 'upload_date': '20150320', + 'timestamp': 1426892400, + 'duration': 2760.9, }, } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) name = mobj.group('name') + webpage = self._download_webpage(url, name) + + if ">Ce live n'est plus disponible en replay<" in webpage: + raise ExtractorError('Video %s is not available' % name, expected=True) + video_id, catalogue = self._search_regex( r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@') diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py new file mode 100644 index 000000000..d545e01bb --- /dev/null +++ b/youtube_dl/extractor/gamersyde.py @@ -0,0 +1,70 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + js_to_json, + parse_duration, + remove_start, +) + + +class GamersydeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_(?P<display_id>[\da-z_]+)-(?P<id>\d+)_[a-z]{2}\.html' + _TEST = { + 'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html', + 'md5': 'f38d400d32f19724570040d5ce3a505f', + 'info_dict': { + 'id': '34371', + 'ext': 'mp4', + 'duration': 372, + 'title': 'Bloodborne - Birth of a hero', + 'thumbnail': 're:^https?://.*\.jpg$', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') + + webpage = self._download_webpage(url, display_id) + + playlist = self._parse_json( + self._search_regex( + r'(?s)playlist: \[({.+?})\]\s*}\);', webpage, 'files'), + display_id, transform_source=js_to_json) + + formats = [] + for source in playlist['sources']: + video_url = source.get('file') + if not video_url: + continue + format_id = source.get('label') + f = { + 'url': video_url, + 'format_id': format_id, + } + m = re.search(r'^(?P<height>\d+)[pP](?P<fps>\d+)fps', format_id) + if m: + f.update({ + 'height': int(m.group('height')), + 'fps': int(m.group('fps')), + }) + formats.append(f) + self._sort_formats(formats) + + title = remove_start(playlist['title'], '%s - ' % video_id) + thumbnail = playlist.get('image') + duration = parse_duration(self._search_regex( + r'Length:</label>([^<]+)<', webpage, 'duration', fatal=False)) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 2ff002643..7ad555e9f 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -34,6 +34,7 @@ from .ooyala import OoyalaIE from .rutv import RUTVIE from .smotri import SmotriIE from .condenast import CondeNastIE +from .udn import UDNEmbedIE class GenericIE(InfoExtractor): @@ -641,6 +642,32 @@ class GenericIE(InfoExtractor): 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', } }, + # Crooks and Liars embed + { + 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists', + 'info_dict': { + 'id': '8RUoRhRi', + 'ext': 'mp4', + 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!", + 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f', + 'timestamp': 1428207000, + 'upload_date': '20150405', + 'uploader': 'Heather', + }, + }, + # Crooks and Liars external embed + { + 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/', + 'info_dict': { + 'id': 'MTE3MjUtMzQ2MzA', + 'ext': 'mp4', + 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5', + 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec', + 'timestamp': 1265032391, + 'upload_date': '20100201', + 'uploader': 'Heather', + }, + }, # NBC Sports vplayer embed { 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a', @@ -650,6 +677,17 @@ class GenericIE(InfoExtractor): 'title': "PFT Live: New leader in the 'new-look' defense", 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e', }, + }, + # UDN embed + { + 'url': 'http://www.udn.com/news/story/7314/822787', + 'md5': 'de06b4c90b042c128395a88f0384817e', + 'info_dict': { + 'id': '300040', + 'ext': 'mp4', + 'title': '生物老師男變女 全校挺"做自己"', + 'thumbnail': 're:^https?://.*\.jpg$', + } } ] @@ -1263,11 +1301,24 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin') + # Look for Crooks and Liars embeds + mobj = re.search( + r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url')) + # Look for NBC Sports VPlayer embeds nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage) if nbc_sports_url: return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') + # Look for UDN embeds + mobj = re.search( + r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage) + if mobj is not None: + return self.url_result( + compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed') + def check_video(vurl): if YoutubeIE.suitable(vurl): return True diff --git a/youtube_dl/extractor/hitbox.py b/youtube_dl/extractor/hitbox.py index 84bd7c080..d606429ca 100644 --- a/youtube_dl/extractor/hitbox.py +++ b/youtube_dl/extractor/hitbox.py @@ -10,6 +10,7 @@ from ..utils import ( float_or_none, int_or_none, compat_str, + determine_ext, ) @@ -147,12 +148,27 @@ class HitboxLiveIE(HitboxIE): servers.append(base_url) for stream in cdn.get('bitrates'): label = stream.get('label') - if label != 'Auto': + if label == 'Auto': + continue + stream_url = stream.get('url') + if not stream_url: + continue + bitrate = int_or_none(stream.get('bitrate')) + if stream.get('provider') == 'hls' or determine_ext(stream_url) == 'm3u8': + if not stream_url.startswith('http'): + continue formats.append({ - 'url': '%s/%s' % (base_url, stream.get('url')), + 'url': stream_url, 'ext': 'mp4', - 'vbr': stream.get('bitrate'), - 'resolution': label, + 'tbr': bitrate, + 'format_note': label, + 'rtmp_live': True, + }) + else: + formats.append({ + 'url': '%s/%s' % (base_url, stream_url), + 'ext': 'mp4', + 'tbr': bitrate, 'rtmp_live': True, 'format_note': host, 'page_url': url, diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 2467f8bdd..ec309dadd 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -21,7 +21,7 @@ from ..utils import ( class LivestreamIE(InfoExtractor): IE_NAME = 'livestream' - _VALID_URL = r'https?://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])' + _VALID_URL = r'https?://(?:new\.)?livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])' _TESTS = [{ 'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', 'md5': '53274c76ba7754fb0e8d072716f2292b', @@ -51,6 +51,9 @@ class LivestreamIE(InfoExtractor): }, { 'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640', 'only_matching': True, + }, { + 'url': 'http://livestream.com/bsww/concacafbeachsoccercampeonato2015', + 'only_matching': True, }] def _parse_smil(self, video_id, smil_url): diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py index 11608f730..cc3f27194 100644 --- a/youtube_dl/extractor/miomio.py +++ b/youtube_dl/extractor/miomio.py @@ -44,7 +44,7 @@ class MioMioIE(InfoExtractor): xml_config = self._search_regex( r'flashvars="type=sina&(.+?)&', webpage, 'xml config') - + # skipping the following page causes lags and eventually connection drop-outs self._request_webpage( 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)), diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 21aea0c55..84f291558 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -97,7 +97,7 @@ class MixcloudIE(InfoExtractor): r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) description = self._og_search_description(webpage) like_count = str_to_int(self._search_regex( - r'\bbutton-favorite\b.+m-ajax-toggle-count="([^"]+)"', + r'\bbutton-favorite\b[^>]+m-ajax-toggle-count="([^"]+)"', webpage, 'like count', fatal=False)) view_count = str_to_int(self._search_regex( [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py new file mode 100644 index 000000000..9688ed948 --- /dev/null +++ b/youtube_dl/extractor/pornovoisines.py @@ -0,0 +1,96 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import random + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + float_or_none, + unified_strdate, +) + + +class PornoVoisinesIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)' + + _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \ + '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4' + + _SERVER_NUMBERS = (1, 2) + + _TEST = { + 'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/', + 'md5': '5ac670803bc12e9e7f9f662ce64cf1d1', + 'info_dict': { + 'id': '1285', + 'display_id': 'recherche-appartement', + 'ext': 'mp4', + 'title': 'Recherche appartement', + 'description': 'md5:819ea0b785e2a04667a1a01cdc89594e', + 'thumbnail': 're:^https?://.*\.jpg$', + 'upload_date': '20140925', + 'duration': 120, + 'view_count': int, + 'average_rating': float, + 'categories': ['Débutante', 'Scénario', 'Sodomie'], + 'age_limit': 18, + } + } + + @classmethod + def build_video_url(cls, num): + return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') + + webpage = self._download_webpage(url, video_id) + + video_url = self.build_video_url(video_id) + + title = self._html_search_regex( + r'<h1>(.+?)</h1>', webpage, 'title', flags=re.DOTALL) + description = self._html_search_regex( + r'<article id="descriptif">(.+?)</article>', + webpage, "description", fatal=False, flags=re.DOTALL) + + thumbnail = self._search_regex( + r'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id, + webpage, 'thumbnail', fatal=False) + if thumbnail: + thumbnail = 'http://www.pornovoisines.com/%s' % thumbnail + + upload_date = unified_strdate(self._search_regex( + r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False)) + duration = int_or_none(self._search_regex( + 'Durée (\d+)', webpage, 'duration', fatal=False)) + view_count = int_or_none(self._search_regex( + r'(\d+) vues', webpage, 'view count', fatal=False)) + average_rating = self._search_regex( + r'Note : (\d+,\d+)', webpage, 'average rating', fatal=False) + if average_rating: + average_rating = float_or_none(average_rating.replace(',', '.')) + + categories = self._html_search_meta( + 'keywords', webpage, 'categories', fatal=False) + if categories: + categories = [category.strip() for category in categories.split(',')] + + return { + 'id': video_id, + 'display_id': display_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'duration': duration, + 'view_count': view_count, + 'average_rating': average_rating, + 'categories': categories, + 'age_limit': 18, + } diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 385681d06..7cc799664 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -10,6 +10,7 @@ from ..compat import ( ) from ..utils import ( unified_strdate, + int_or_none, ) @@ -24,7 +25,7 @@ class ProSiebenSat1IE(InfoExtractor): 'info_dict': { 'id': '2104602', 'ext': 'mp4', - 'title': 'Staffel 2, Episode 18 - Jahresrückblick', + 'title': 'Episode 18 - Staffel 2', 'description': 'md5:8733c81b702ea472e069bc48bb658fc1', 'upload_date': '20131231', 'duration': 5845.04, @@ -266,6 +267,9 @@ class ProSiebenSat1IE(InfoExtractor): urls_sources = urls_sources.values() def fix_bitrate(bitrate): + bitrate = int_or_none(bitrate) + if not bitrate: + return None return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate for source in urls_sources: diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py new file mode 100644 index 000000000..884c28420 --- /dev/null +++ b/youtube_dl/extractor/radiojavan.py @@ -0,0 +1,67 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import( + unified_strdate, + str_to_int, +) + + +class RadioJavanIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?' + _TEST = { + 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam', + 'md5': 'e85208ffa3ca8b83534fca9fe19af95b', + 'info_dict': { + 'id': 'chaartaar-ashoobam', + 'ext': 'mp4', + 'title': 'Chaartaar - Ashoobam', + 'thumbnail': 're:^https?://.*\.jpe?g$', + 'upload_date': '20150215', + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + formats = [{ + 'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path, + 'format_id': '%sp' % height, + 'height': int(height), + } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)] + self._sort_formats(formats) + + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) + + upload_date = unified_strdate(self._search_regex( + r'class="date_added">Date added: ([^<]+)<', + webpage, 'upload date', fatal=False)) + + view_count = str_to_int(self._search_regex( + r'class="views">Plays: ([\d,]+)', + webpage, 'view count', fatal=False)) + like_count = str_to_int(self._search_regex( + r'class="rating">([\d,]+) likes', + webpage, 'like count', fatal=False)) + dislike_count = str_to_int(self._search_regex( + r'class="rating">([\d,]+) dislikes', + webpage, 'dislike count', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'view_count': view_count, + 'like_count': like_count, + 'dislike_count': dislike_count, + 'formats': formats, + } diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 144e33982..1631faf29 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -13,7 +13,7 @@ from ..utils import ( class RaiIE(InfoExtractor): - _VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)' + _VALID_URL = r'(?P<url>(?P<host>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it))/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)' _TESTS = [ { 'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html', @@ -62,34 +62,78 @@ class RaiIE(InfoExtractor): 'description': 'Edizione delle ore 20:30 ', } }, + { + 'url': 'http://www.ilcandidato.rai.it/dl/ray/media/Il-Candidato---Primo-episodio-Le-Primarie-28e5525a-b495-45e8-a7c3-bc48ba45d2b6.html', + 'md5': '02b64456f7cc09f96ff14e7dd489017e', + 'info_dict': { + 'id': '28e5525a-b495-45e8-a7c3-bc48ba45d2b6', + 'ext': 'flv', + 'title': 'Il Candidato - Primo episodio: "Le Primarie"', + 'description': 'Primo appuntamento con "Il candidato" con Filippo Timi, alias Piero Zucca presidente!', + 'uploader': 'RaiTre', + } + } ] + def _extract_relinker_url(self, webpage): + return self._proto_relative_url(self._search_regex( + [r'name="videourl" content="([^"]+)"', r'var\s+videoURL(?:_MP4)?\s*=\s*"([^"]+)"'], + webpage, 'relinker url', default=None)) + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + host = mobj.group('host') - media = self._download_json('%s?json' % mobj.group('url'), video_id, 'Downloading video JSON') + webpage = self._download_webpage(url, video_id) - title = media.get('name') - description = media.get('desc') - thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image') - duration = parse_duration(media.get('length')) - uploader = media.get('author') - upload_date = unified_strdate(media.get('date')) + relinker_url = self._extract_relinker_url(webpage) - formats = [] + if not relinker_url: + iframe_path = self._search_regex( + r'<iframe[^>]+src="/?(dl/[^"]+\?iframe\b[^"]*)"', + webpage, 'iframe') + webpage = self._download_webpage( + '%s/%s' % (host, iframe_path), video_id) + relinker_url = self._extract_relinker_url(webpage) - for format_id in ['wmv', 'm3u8', 'mediaUri', 'h264']: - media_url = media.get(format_id) - if not media_url: - continue - formats.append({ + relinker = self._download_json( + '%s&output=47' % relinker_url, video_id) + + media_url = relinker['video'][0] + ct = relinker.get('ct') + if ct == 'f4m': + formats = self._extract_f4m_formats( + media_url + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id) + else: + formats = [{ 'url': media_url, - 'format_id': format_id, - 'ext': 'mp4', - }) + 'format_id': ct, + }] - subtitles = self.extract_subtitles(video_id, url) + json_link = self._html_search_meta( + 'jsonlink', webpage, 'JSON link', default=None) + if json_link: + media = self._download_json( + host + json_link, video_id, 'Downloading video JSON') + title = media.get('name') + description = media.get('desc') + thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image') + duration = parse_duration(media.get('length')) + uploader = media.get('author') + upload_date = unified_strdate(media.get('date')) + else: + title = (self._search_regex( + r'var\s+videoTitolo\s*=\s*"(.+?)";', + webpage, 'title', default=None) or self._og_search_title(webpage)).replace('\\"', '"') + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + duration = None + uploader = self._html_search_meta('Editore', webpage, 'uploader') + upload_date = unified_strdate(self._html_search_meta( + 'item-date', webpage, 'upload date', default=None)) + + subtitles = self.extract_subtitles(video_id, webpage) return { 'id': video_id, @@ -103,8 +147,7 @@ class RaiIE(InfoExtractor): 'subtitles': subtitles, } - def _get_subtitles(self, video_id, url): - webpage = self._download_webpage(url, video_id) + def _get_subtitles(self, video_id, webpage): subtitles = {} m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage) if m: diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 13f071077..849300140 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -8,8 +8,10 @@ import time from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( + ExtractorError, float_or_none, remove_end, + std_headers, struct_unpack, ) @@ -84,13 +86,22 @@ class RTVEALaCartaIE(InfoExtractor): 'only_matching': True, }] + def _real_initialize(self): + user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8') + manager_info = self._download_json( + 'http://www.rtve.es/odin/loki/' + user_agent_b64, + None, 'Fetching manager info') + self._manager = manager_info['manager'] + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') info = self._download_json( 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, video_id)['page']['items'][0] - png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id + if info['state'] == 'DESPU': + raise ExtractorError('The video is no longer available', expected=True) + png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id) png = self._download_webpage(png_url, video_id, 'Downloading url information') video_url = _decrypt_url(png) if not video_url.endswith('.f4m'): diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py new file mode 100644 index 000000000..7f060b15b --- /dev/null +++ b/youtube_dl/extractor/spankbang.py @@ -0,0 +1,60 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class SpankBangIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video' + _TEST = { + 'url': 'http://spankbang.com/3vvn/video/fantasy+solo', + 'md5': '1cc433e1d6aa14bc376535b8679302f7', + 'info_dict': { + 'id': '3vvn', + 'ext': 'mp4', + 'title': 'fantasy solo', + 'description': 'dillion harper masturbates on a bed', + 'thumbnail': 're:^https?://.*\.jpg$', + 'uploader': 'silly2587', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + stream_key = self._html_search_regex( + r'''var\s+stream_key\s*=\s*['"](.+?)['"]''', + webpage, 'stream key') + + formats = [{ + 'url': 'http://spankbang.com/_%s/%s/title/%sp__mp4' % (video_id, stream_key, height), + 'ext': 'mp4', + 'format_id': '%sp' % height, + 'height': int(height), + } for height in re.findall(r'<span[^>]+q_(\d+)p', webpage)] + self._sort_formats(formats) + + title = self._html_search_regex( + r'(?s)<h1>(.+?)</h1>', webpage, 'title') + description = self._search_regex( + r'class="desc"[^>]*>([^<]+)', + webpage, 'description', default=None) + thumbnail = self._og_search_thumbnail(webpage) + uploader = self._search_regex( + r'class="user"[^>]*>([^<]+)', + webpage, 'uploader', fatal=False) + + age_limit = self._rta_search(webpage) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'formats': formats, + 'age_limit': age_limit, + } diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index a46a7ecba..1caf08cb7 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -4,7 +4,10 @@ import base64 import re from .common import InfoExtractor -from ..utils import qualities +from ..utils import ( + ExtractorError, + qualities, +) class TeamcocoIE(InfoExtractor): @@ -18,6 +21,7 @@ class TeamcocoIE(InfoExtractor): 'ext': 'mp4', 'title': 'Conan Becomes A Mary Kay Beauty Consultant', 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.', + 'duration': 504, 'age_limit': 0, } }, { @@ -28,6 +32,7 @@ class TeamcocoIE(InfoExtractor): 'ext': 'mp4', 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.', 'title': 'Louis C.K. Interview Pt. 1 11/3/11', + 'duration': 288, 'age_limit': 0, } } @@ -49,35 +54,37 @@ class TeamcocoIE(InfoExtractor): video_id = self._html_search_regex( self._VIDEO_ID_REGEXES, webpage, 'video id') - embed_url = 'http://teamcoco.com/embed/v/%s' % video_id - embed = self._download_webpage( - embed_url, video_id, 'Downloading embed page') - - player_data = self._parse_json(self._search_regex( - r'Y\.Ginger\.Module\.Player(?:;var\s*player\s*=\s*new\s*m)?\((\{.*?\})\);', embed, 'player data'), video_id) + preloads = re.findall(r'"preload":\s*"([^"]+)"', webpage) + if not preloads: + raise ExtractorError('Preload information could not be extracted') + preload = max([(len(p), p) for p in preloads])[1] data = self._parse_json( - base64.b64decode(player_data['preload'].encode('ascii')).decode('utf-8'), video_id) + base64.b64decode(preload.encode('ascii')).decode('utf-8'), video_id) formats = [] get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p']) for filed in data['files']: - m_format = re.search(r'(\d+(k|p))\.mp4', filed['url']) - if m_format is not None: - format_id = m_format.group(1) + if filed['type'] == 'hls': + formats.extend(self._extract_m3u8_formats( + filed['url'], video_id, ext='mp4')) else: - format_id = filed['bitrate'] - tbr = ( - int(filed['bitrate']) - if filed['bitrate'].isdigit() - else None) + m_format = re.search(r'(\d+(k|p))\.mp4', filed['url']) + if m_format is not None: + format_id = m_format.group(1) + else: + format_id = filed['bitrate'] + tbr = ( + int(filed['bitrate']) + if filed['bitrate'].isdigit() + else None) - formats.append({ - 'url': filed['url'], - 'ext': 'mp4', - 'tbr': tbr, - 'format_id': format_id, - 'quality': get_quality(format_id), - }) + formats.append({ + 'url': filed['url'], + 'ext': 'mp4', + 'tbr': tbr, + 'format_id': format_id, + 'quality': get_quality(format_id), + }) self._sort_formats(formats) @@ -88,5 +95,6 @@ class TeamcocoIE(InfoExtractor): 'title': data['title'], 'thumbnail': data.get('thumb', {}).get('href'), 'description': data.get('teaser'), + 'duration': data.get('duration'), 'age_limit': self._family_friendly_search(webpage), } diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 4cec06f8b..a2dc14c2b 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -5,9 +5,8 @@ import re from .common import InfoExtractor -from ..compat import ( - compat_str, -) +from ..compat import compat_str +from ..utils import int_or_none class TEDIE(InfoExtractor): @@ -170,17 +169,41 @@ class TEDIE(InfoExtractor): finfo = self._NATIVE_FORMATS.get(f['format_id']) if finfo: f.update(finfo) - else: - # Use rtmp downloads - formats = [{ - 'format_id': f['name'], - 'url': talk_info['streamer'], - 'play_path': f['file'], - 'ext': 'flv', - 'width': f['width'], - 'height': f['height'], - 'tbr': f['bitrate'], - } for f in talk_info['resources']['rtmp']] + + for format_id, resources in talk_info['resources'].items(): + if format_id == 'h264': + for resource in resources: + bitrate = int_or_none(resource.get('bitrate')) + formats.append({ + 'url': resource['file'], + 'format_id': '%s-%sk' % (format_id, bitrate), + 'tbr': bitrate, + }) + elif format_id == 'rtmp': + streamer = talk_info.get('streamer') + if not streamer: + continue + for resource in resources: + formats.append({ + 'format_id': '%s-%s' % (format_id, resource.get('name')), + 'url': streamer, + 'play_path': resource['file'], + 'ext': 'flv', + 'width': int_or_none(resource.get('width')), + 'height': int_or_none(resource.get('height')), + 'tbr': int_or_none(resource.get('bitrate')), + }) + elif format_id == 'hls': + formats.extend(self._extract_m3u8_formats( + resources.get('stream'), video_name, 'mp4', m3u8_id=format_id)) + + audio_download = talk_info.get('audioDownload') + if audio_download: + formats.append({ + 'url': audio_download, + 'format_id': 'audio', + }) + self._sort_formats(formats) video_id = compat_str(talk_info['id']) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 0e3e627f4..6a006b2d2 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -17,6 +17,7 @@ from ..utils import ( ExtractorError, xpath_with_ns, unsmuggle_url, + int_or_none, ) _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'}) @@ -28,7 +29,7 @@ class ThePlatformIE(InfoExtractor): (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)? |theplatform:)(?P<id>[^/\?&]+)''' - _TEST = { + _TESTS = [{ # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/ 'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true', 'info_dict': { @@ -42,7 +43,20 @@ class ThePlatformIE(InfoExtractor): # rtmp download 'skip_download': True, }, - } + }, { + # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/ + 'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT', + 'info_dict': { + 'id': '22d_qsQ6MIRT', + 'ext': 'flv', + 'description': 'md5:ac330c9258c04f9d7512cf26b9595409', + 'title': 'Tesla Model S: A second step towards a cleaner motoring future', + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }] @staticmethod def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False): @@ -115,7 +129,7 @@ class ThePlatformIE(InfoExtractor): head = meta.find(_x('smil:head')) body = meta.find(_x('smil:body')) - f4m_node = body.find(_x('smil:seq//smil:video')) + f4m_node = body.find(_x('smil:seq//smil:video')) or body.find(_x('smil:seq/smil:video')) if f4m_node is not None and '.f4m' in f4m_node.attrib['src']: f4m_url = f4m_node.attrib['src'] if 'manifest.f4m?' not in f4m_url: @@ -127,13 +141,17 @@ class ThePlatformIE(InfoExtractor): else: formats = [] switch = body.find(_x('smil:switch')) + if switch is None: + switch = body.find(_x('smil:par//smil:switch')) or body.find(_x('smil:par/smil:switch')) + if switch is None: + switch = body.find(_x('smil:par')) if switch is not None: base_url = head.find(_x('smil:meta')).attrib['base'] for f in switch.findall(_x('smil:video')): attr = f.attrib - width = int(attr['width']) - height = int(attr['height']) - vbr = int(attr['system-bitrate']) // 1000 + width = int_or_none(attr.get('width')) + height = int_or_none(attr.get('height')) + vbr = int_or_none(attr.get('system-bitrate'), 1000) format_id = '%dx%d_%dk' % (width, height, vbr) formats.append({ 'format_id': format_id, @@ -145,10 +163,10 @@ class ThePlatformIE(InfoExtractor): 'vbr': vbr, }) else: - switch = body.find(_x('smil:seq//smil:switch')) + switch = body.find(_x('smil:seq//smil:switch')) or body.find(_x('smil:seq/smil:switch')) for f in switch.findall(_x('smil:video')): attr = f.attrib - vbr = int(attr['system-bitrate']) // 1000 + vbr = int_or_none(attr.get('system-bitrate'), 1000) ext = determine_ext(attr['src']) if ext == 'once': ext = 'mp4' @@ -167,5 +185,5 @@ class ThePlatformIE(InfoExtractor): 'formats': formats, 'description': info['description'], 'thumbnail': info['defaultThumbnailUrl'], - 'duration': info['duration'] // 1000, + 'duration': int_or_none(info.get('duration'), 1000), } diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py new file mode 100644 index 000000000..bba25bb58 --- /dev/null +++ b/youtube_dl/extractor/udn.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +from .common import InfoExtractor +from ..utils import js_to_json +from ..compat import compat_urlparse + + +class UDNEmbedIE(InfoExtractor): + _VALID_URL = r'(?:https?:)?//video\.udn\.com/embed/news/(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://video.udn.com/embed/news/300040', + 'md5': 'de06b4c90b042c128395a88f0384817e', + 'info_dict': { + 'id': '300040', + 'ext': 'mp4', + 'title': '生物老師男變女 全校挺"做自己"', + 'thumbnail': 're:^https?://.*\.jpg$', + } + }, { + 'url': '//video.udn.com/embed/news/300040', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + page = self._download_webpage(url, video_id) + + options = json.loads(js_to_json(self._html_search_regex( + r'var options\s*=\s*([^;]+);', page, 'video urls dictionary'))) + + video_urls = options['video'] + + if video_urls.get('youtube'): + return self.url_result(video_urls.get('youtube'), 'Youtube') + + try: + del video_urls['youtube'] + except KeyError: + pass + + formats = [{ + 'url': self._download_webpage( + compat_urlparse.urljoin(url, api_url), video_id, + 'retrieve url for %s video' % video_type), + 'format_id': video_type, + 'preference': 0 if video_type == 'mp4' else -1, + } for video_type, api_url in video_urls.items()] + + self._sort_formats(formats) + + thumbnail = None + + if options.get('gallery') and len(options['gallery']): + thumbnail = options['gallery'][0].get('original') + + return { + 'id': video_id, + 'formats': formats, + 'title': options['title'], + 'thumbnail': thumbnail + } diff --git a/youtube_dl/extractor/vine.py b/youtube_dl/extractor/vine.py index c3187cfeb..d4f5a991e 100644 --- a/youtube_dl/extractor/vine.py +++ b/youtube_dl/extractor/vine.py @@ -9,8 +9,8 @@ from ..utils import unified_strdate class VineIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vine\.co/v/(?P<id>\w+)' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?vine\.co/(?:v|oembed)/(?P<id>\w+)' + _TESTS = [{ 'url': 'https://vine.co/v/b9KOOWX7HUx', 'md5': '2f36fed6235b16da96ce9b4dc890940d', 'info_dict': { @@ -23,21 +23,53 @@ class VineIE(InfoExtractor): 'uploader': 'Jack Dorsey', 'uploader_id': '76', }, - } + }, { + 'url': 'https://vine.co/v/MYxVapFvz2z', + 'md5': '7b9a7cbc76734424ff942eb52c8f1065', + 'info_dict': { + 'id': 'MYxVapFvz2z', + 'ext': 'mp4', + 'title': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14', + 'alt_title': 'Vine by Luna', + 'description': 'Fuck Da Police #Mikebrown #justice #ferguson #prayforferguson #protesting #NMOS14', + 'upload_date': '20140815', + 'uploader': 'Luna', + 'uploader_id': '1102363502380728320', + }, + }, { + 'url': 'https://vine.co/v/bxVjBbZlPUH', + 'md5': 'ea27decea3fa670625aac92771a96b73', + 'info_dict': { + 'id': 'bxVjBbZlPUH', + 'ext': 'mp4', + 'title': '#mw3 #ac130 #killcam #angelofdeath', + 'alt_title': 'Vine by Z3k3', + 'description': '#mw3 #ac130 #killcam #angelofdeath', + 'upload_date': '20130430', + 'uploader': 'Z3k3', + 'uploader_id': '936470460173008896', + }, + }, { + 'url': 'https://vine.co/oembed/MYxVapFvz2z.json', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage('https://vine.co/v/' + video_id, video_id) - data = json.loads(self._html_search_regex( - r'window\.POST_DATA = { %s: ({.+?}) }' % video_id, webpage, 'vine data')) + data = self._parse_json( + self._html_search_regex( + r'window\.POST_DATA = { %s: ({.+?}) };\s*</script>' % video_id, + webpage, 'vine data'), + video_id) formats = [{ 'format_id': '%(format)s-%(rate)s' % f, 'vcodec': f['format'], 'quality': f['rate'], 'url': f['videoUrl'], - } for f in data['videoUrls'] if f.get('rate')] + } for f in data['videoUrls']] self._sort_formats(formats) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5488101e1..2774ec30b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -495,7 +495,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': '孫艾倫', 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人', }, - } + }, + # url_encoded_fmt_stream_map is empty string + { + 'url': 'qEJwOuvDf7I', + 'info_dict': { + 'id': 'qEJwOuvDf7I', + 'ext': 'mp4', + 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге', + 'description': '', + 'upload_date': '20150404', + 'uploader_id': 'spbelect', + 'uploader': 'Наблюдатели Петербурга', + }, + 'params': { + 'skip_download': 'requires avconv', + } + }, ] def __init__(self, *args, **kwargs): @@ -772,33 +788,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor): errnote='Could not download DASH manifest') formats = [] - for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): - url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') - if url_el is None: - continue - format_id = r.attrib['id'] - video_url = url_el.text - filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) - f = { - 'format_id': format_id, - 'url': video_url, - 'width': int_or_none(r.attrib.get('width')), - 'height': int_or_none(r.attrib.get('height')), - 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), - 'asr': int_or_none(r.attrib.get('audioSamplingRate')), - 'filesize': filesize, - 'fps': int_or_none(r.attrib.get('frameRate')), - } - try: - existing_format = next( - fo for fo in formats - if fo['format_id'] == format_id) - except StopIteration: - full_info = self._formats.get(format_id, {}).copy() - full_info.update(f) - formats.append(full_info) - else: - existing_format.update(f) + for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'): + mime_type = a.attrib.get('mimeType') + for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'): + url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') + if url_el is None: + continue + if mime_type == 'text/vtt': + # TODO implement WebVTT downloading + pass + elif mime_type.startswith('audio/') or mime_type.startswith('video/'): + format_id = r.attrib['id'] + video_url = url_el.text + filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) + f = { + 'format_id': format_id, + 'url': video_url, + 'width': int_or_none(r.attrib.get('width')), + 'height': int_or_none(r.attrib.get('height')), + 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), + 'asr': int_or_none(r.attrib.get('audioSamplingRate')), + 'filesize': filesize, + 'fps': int_or_none(r.attrib.get('frameRate')), + } + try: + existing_format = next( + fo for fo in formats + if fo['format_id'] == format_id) + except StopIteration: + full_info = self._formats.get(format_id, {}).copy() + full_info.update(f) + formats.append(full_info) + else: + existing_format.update(f) + else: + self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) return formats def _real_extract(self, url): @@ -855,7 +879,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): args = ytplayer_config['args'] # Convert to the same format returned by compat_parse_qs video_info = dict((k, [v]) for k, v in args.items()) - if 'url_encoded_fmt_stream_map' not in args: + if not args.get('url_encoded_fmt_stream_map'): raise ValueError('No stream_map present') # caught below except ValueError: # We fallback to the get_video_info pages (used by the embed page) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 35c7e5fb3..11603f60d 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -13,6 +13,7 @@ from .compat import ( compat_kwargs, ) from .utils import ( + preferredencoding, write_string, ) from .version import __version__ @@ -120,19 +121,19 @@ def parseOpts(overrideArguments=None): general.add_option( '-h', '--help', action='help', - help='print this help text and exit') + help='Print this help text and exit') general.add_option( '-v', '--version', action='version', - help='print program version and exit') + help='Print program version and exit') general.add_option( '-U', '--update', action='store_true', dest='update_self', - help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') + help='Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') general.add_option( '-i', '--ignore-errors', action='store_true', dest='ignoreerrors', default=False, - help='continue on download errors, for example to skip unavailable videos in a playlist') + help='Continue on download errors, for example to skip unavailable videos in a playlist') general.add_option( '--abort-on-error', action='store_false', dest='ignoreerrors', @@ -140,7 +141,7 @@ def parseOpts(overrideArguments=None): general.add_option( '--dump-user-agent', action='store_true', dest='dump_user_agent', default=False, - help='display the current browser identification') + help='Display the current browser identification') general.add_option( '--list-extractors', action='store_true', dest='list_extractors', default=False, @@ -152,7 +153,7 @@ def parseOpts(overrideArguments=None): general.add_option( '--default-search', dest='default_search', metavar='PREFIX', - help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.') + help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.') general.add_option( '--ignore-config', action='store_true', @@ -169,7 +170,7 @@ def parseOpts(overrideArguments=None): '--no-color', '--no-colors', action='store_true', dest='no_color', default=False, - help='Do not emit color codes in output.') + help='Do not emit color codes in output') network = optparse.OptionGroup(parser, 'Network Options') network.add_option( @@ -206,23 +207,23 @@ def parseOpts(overrideArguments=None): selection.add_option( '--playlist-start', dest='playliststart', metavar='NUMBER', default=1, type=int, - help='playlist video to start at (default is %default)') + help='Playlist video to start at (default is %default)') selection.add_option( '--playlist-end', dest='playlistend', metavar='NUMBER', default=None, type=int, - help='playlist video to end at (default is last)') + help='Playlist video to end at (default is last)') selection.add_option( '--playlist-items', dest='playlist_items', metavar='ITEM_SPEC', default=None, - help='playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.') + help='Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.') selection.add_option( '--match-title', dest='matchtitle', metavar='REGEX', - help='download only matching titles (regex or caseless sub-string)') + help='Download only matching titles (regex or caseless sub-string)') selection.add_option( '--reject-title', dest='rejecttitle', metavar='REGEX', - help='skip download for matching titles (regex or caseless sub-string)') + help='Skip download for matching titles (regex or caseless sub-string)') selection.add_option( '--max-downloads', dest='max_downloads', metavar='NUMBER', type=int, default=None, @@ -238,19 +239,19 @@ def parseOpts(overrideArguments=None): selection.add_option( '--date', metavar='DATE', dest='date', default=None, - help='download only videos uploaded in this date') + help='Download only videos uploaded in this date') selection.add_option( '--datebefore', metavar='DATE', dest='datebefore', default=None, - help='download only videos uploaded on or before this date (i.e. inclusive)') + help='Download only videos uploaded on or before this date (i.e. inclusive)') selection.add_option( '--dateafter', metavar='DATE', dest='dateafter', default=None, - help='download only videos uploaded on or after this date (i.e. inclusive)') + help='Download only videos uploaded on or after this date (i.e. inclusive)') selection.add_option( '--min-views', metavar='COUNT', dest='min_views', default=None, type=int, - help='Do not download any videos with less than COUNT views',) + help='Do not download any videos with less than COUNT views') selection.add_option( '--max-views', metavar='COUNT', dest='max_views', default=None, type=int, @@ -259,7 +260,7 @@ def parseOpts(overrideArguments=None): '--match-filter', metavar='FILTER', dest='match_filter', default=None, help=( - '(Experimental) Generic video filter. ' + 'Generic video filter (experimental). ' 'Specify any key (see help for -o for a list of available keys) to' ' match if the key is present, ' '!key to check if the key is not present,' @@ -277,15 +278,15 @@ def parseOpts(overrideArguments=None): selection.add_option( '--no-playlist', action='store_true', dest='noplaylist', default=False, - help='If the URL refers to a video and a playlist, download only the video.') + help='Download only the video, if the URL refers to a video and a playlist.') selection.add_option( '--yes-playlist', action='store_false', dest='noplaylist', default=False, - help='If the URL refers to a video and a playlist, download the playlist.') + help='Download the playlist, if the URL refers to a video and a playlist.') selection.add_option( '--age-limit', metavar='YEARS', dest='age_limit', default=None, type=int, - help='download only videos suitable for the given age') + help='Download only videos suitable for the given age') selection.add_option( '--download-archive', metavar='FILE', dest='download_archive', @@ -299,30 +300,30 @@ def parseOpts(overrideArguments=None): authentication.add_option( '-u', '--username', dest='username', metavar='USERNAME', - help='login with this account ID') + help='Login with this account ID') authentication.add_option( '-p', '--password', dest='password', metavar='PASSWORD', - help='account password. If this option is left out, youtube-dl will ask interactively.') + help='Account password. If this option is left out, youtube-dl will ask interactively.') authentication.add_option( '-2', '--twofactor', dest='twofactor', metavar='TWOFACTOR', - help='two-factor auth code') + help='Two-factor auth code') authentication.add_option( '-n', '--netrc', action='store_true', dest='usenetrc', default=False, - help='use .netrc authentication data') + help='Use .netrc authentication data') authentication.add_option( '--video-password', dest='videopassword', metavar='PASSWORD', - help='video password (vimeo, smotri)') + help='Video password (vimeo, smotri)') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( '-f', '--format', action='store', dest='format', metavar='FORMAT', default=None, help=( - 'video format code, specify the order of preference using' + 'Video format code, specify the order of preference using' ' slashes, as in -f 22/17/18 . ' ' Instead of format codes, you can select by extension for the ' 'extensions aac, m4a, mp3, mp4, ogg, wav, webm. ' @@ -350,19 +351,19 @@ def parseOpts(overrideArguments=None): video_format.add_option( '--all-formats', action='store_const', dest='format', const='all', - help='download all available video formats') + help='Download all available video formats') video_format.add_option( '--prefer-free-formats', action='store_true', dest='prefer_free_formats', default=False, - help='prefer free video formats unless a specific one is requested') + help='Prefer free video formats unless a specific one is requested') video_format.add_option( '--max-quality', action='store', dest='format_limit', metavar='FORMAT', - help='highest quality format to download') + help='Highest quality format to download') video_format.add_option( '-F', '--list-formats', action='store_true', dest='listformats', - help='list all available formats') + help='List all available formats') video_format.add_option( '--youtube-include-dash-manifest', action='store_true', dest='youtube_include_dash_manifest', default=True, @@ -382,46 +383,46 @@ def parseOpts(overrideArguments=None): subtitles.add_option( '--write-sub', '--write-srt', action='store_true', dest='writesubtitles', default=False, - help='write subtitle file') + help='Write subtitle file') subtitles.add_option( '--write-auto-sub', '--write-automatic-sub', action='store_true', dest='writeautomaticsub', default=False, - help='write automatic subtitle file (youtube only)') + help='Write automatic subtitle file (YouTube only)') subtitles.add_option( '--all-subs', action='store_true', dest='allsubtitles', default=False, - help='downloads all the available subtitles of the video') + help='Download all the available subtitles of the video') subtitles.add_option( '--list-subs', action='store_true', dest='listsubtitles', default=False, - help='lists all available subtitles for the video') + help='List all available subtitles for the video') subtitles.add_option( '--sub-format', action='store', dest='subtitlesformat', metavar='FORMAT', default='best', - help='subtitle format, accepts formats preference, for example: "ass/srt/best"') + help='Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"') subtitles.add_option( '--sub-lang', '--sub-langs', '--srt-lang', action='callback', dest='subtitleslangs', metavar='LANGS', type='str', default=[], callback=_comma_separated_values_options_callback, - help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') + help='Languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') downloader = optparse.OptionGroup(parser, 'Download Options') downloader.add_option( '-r', '--rate-limit', dest='ratelimit', metavar='LIMIT', - help='maximum download rate in bytes per second (e.g. 50K or 4.2M)') + help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)') downloader.add_option( '-R', '--retries', dest='retries', metavar='RETRIES', default=10, - help='number of retries (default is %default), or "infinite".') + help='Number of retries (default is %default), or "infinite".') downloader.add_option( '--buffer-size', dest='buffersize', metavar='SIZE', default='1024', - help='size of download buffer (e.g. 1024 or 16K) (default is %default)') + help='Size of download buffer (e.g. 1024 or 16K) (default is %default)') downloader.add_option( '--no-resize-buffer', action='store_true', dest='noresizebuffer', default=False, - help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.') + help='Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.') downloader.add_option( '--test', action='store_true', dest='test', default=False, @@ -433,11 +434,11 @@ def parseOpts(overrideArguments=None): downloader.add_option( '--xattr-set-filesize', dest='xattr_set_filesize', action='store_true', - help='(experimental) set file xattribute ytdl.filesize with expected filesize') + help='Set file xattribute ytdl.filesize with expected filesize (experimental)') downloader.add_option( '--hls-prefer-native', dest='hls_prefer_native', action='store_true', - help='(experimental) Use the native HLS downloader instead of ffmpeg.') + help='Use the native HLS downloader instead of ffmpeg (experimental)') downloader.add_option( '--external-downloader', dest='external_downloader', metavar='COMMAND', @@ -446,7 +447,7 @@ def parseOpts(overrideArguments=None): downloader.add_option( '--external-downloader-args', dest='external_downloader_args', metavar='ARGS', - help='Give these arguments to the external downloader.') + help='Give these arguments to the external downloader') workarounds = optparse.OptionGroup(parser, 'Workarounds') workarounds.add_option( @@ -456,7 +457,7 @@ def parseOpts(overrideArguments=None): workarounds.add_option( '--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, - help='Suppress HTTPS certificate validation.') + help='Suppress HTTPS certificate validation') workarounds.add_option( '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure', @@ -464,16 +465,16 @@ def parseOpts(overrideArguments=None): workarounds.add_option( '--user-agent', metavar='UA', dest='user_agent', - help='specify a custom user agent') + help='Specify a custom user agent') workarounds.add_option( '--referer', metavar='URL', dest='referer', default=None, - help='specify a custom referer, use if the video access is restricted to one domain', + help='Specify a custom referer, use if the video access is restricted to one domain', ) workarounds.add_option( '--add-header', metavar='FIELD:VALUE', dest='headers', action='append', - help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', + help='Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', ) workarounds.add_option( '--bidi-workaround', @@ -488,7 +489,7 @@ def parseOpts(overrideArguments=None): verbosity.add_option( '-q', '--quiet', action='store_true', dest='quiet', default=False, - help='activates quiet mode') + help='Activate quiet mode') verbosity.add_option( '--no-warnings', dest='no_warnings', action='store_true', default=False, @@ -496,51 +497,51 @@ def parseOpts(overrideArguments=None): verbosity.add_option( '-s', '--simulate', action='store_true', dest='simulate', default=False, - help='do not download the video and do not write anything to disk',) + help='Do not download the video and do not write anything to disk') verbosity.add_option( '--skip-download', action='store_true', dest='skip_download', default=False, - help='do not download the video',) + help='Do not download the video') verbosity.add_option( '-g', '--get-url', action='store_true', dest='geturl', default=False, - help='simulate, quiet but print URL') + help='Simulate, quiet but print URL') verbosity.add_option( '-e', '--get-title', action='store_true', dest='gettitle', default=False, - help='simulate, quiet but print title') + help='Simulate, quiet but print title') verbosity.add_option( '--get-id', action='store_true', dest='getid', default=False, - help='simulate, quiet but print id') + help='Simulate, quiet but print id') verbosity.add_option( '--get-thumbnail', action='store_true', dest='getthumbnail', default=False, - help='simulate, quiet but print thumbnail URL') + help='Simulate, quiet but print thumbnail URL') verbosity.add_option( '--get-description', action='store_true', dest='getdescription', default=False, - help='simulate, quiet but print video description') + help='Simulate, quiet but print video description') verbosity.add_option( '--get-duration', action='store_true', dest='getduration', default=False, - help='simulate, quiet but print video length') + help='Simulate, quiet but print video length') verbosity.add_option( '--get-filename', action='store_true', dest='getfilename', default=False, - help='simulate, quiet but print output filename') + help='Simulate, quiet but print output filename') verbosity.add_option( '--get-format', action='store_true', dest='getformat', default=False, - help='simulate, quiet but print output format') + help='Simulate, quiet but print output format') verbosity.add_option( '-j', '--dump-json', action='store_true', dest='dumpjson', default=False, - help='simulate, quiet but print JSON information. See --output for a description of available keys.') + help='Simulate, quiet but print JSON information. See --output for a description of available keys.') verbosity.add_option( '-J', '--dump-single-json', action='store_true', dest='dump_single_json', default=False, - help='simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.') + help='Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.') verbosity.add_option( '--print-json', action='store_true', dest='print_json', default=False, @@ -549,23 +550,23 @@ def parseOpts(overrideArguments=None): verbosity.add_option( '--newline', action='store_true', dest='progress_with_newline', default=False, - help='output progress bar as new lines') + help='Output progress bar as new lines') verbosity.add_option( '--no-progress', action='store_true', dest='noprogress', default=False, - help='do not print progress bar') + help='Do not print progress bar') verbosity.add_option( '--console-title', action='store_true', dest='consoletitle', default=False, - help='display progress in console titlebar') + help='Display progress in console titlebar') verbosity.add_option( '-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print various debugging information') + help='Print various debugging information') verbosity.add_option( '--dump-pages', '--dump-intermediate-pages', action='store_true', dest='dump_intermediate_pages', default=False, - help='print downloaded pages to debug problems (very verbose)') + help='Print downloaded pages to debug problems (very verbose)') verbosity.add_option( '--write-pages', action='store_true', dest='write_pages', default=False, @@ -581,29 +582,29 @@ def parseOpts(overrideArguments=None): verbosity.add_option( '-C', '--call-home', dest='call_home', action='store_true', default=False, - help='Contact the youtube-dl server for debugging.') + help='Contact the youtube-dl server for debugging') verbosity.add_option( '--no-call-home', dest='call_home', action='store_false', default=False, - help='Do NOT contact the youtube-dl server for debugging.') + help='Do NOT contact the youtube-dl server for debugging') filesystem = optparse.OptionGroup(parser, 'Filesystem Options') filesystem.add_option( '-a', '--batch-file', dest='batchfile', metavar='FILE', - help='file containing URLs to download (\'-\' for stdin)') + help='File containing URLs to download (\'-\' for stdin)') filesystem.add_option( '--id', default=False, - action='store_true', dest='useid', help='use only video ID in file name') + action='store_true', dest='useid', help='Use only video ID in file name') filesystem.add_option( '-o', '--output', dest='outtmpl', metavar='TEMPLATE', - help=('output filename template. Use %(title)s to get the title, ' + help=('Output filename template. Use %(title)s to get the title, ' '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, ' '%(autonumber)s to get an automatically incremented number, ' '%(ext)s for the filename extension, ' '%(format)s for the format description (like "22 - 1280x720" or "HD"), ' - '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), ' + '%(format_id)s for the unique id of the format (like YouTube\'s itags: "137"), ' '%(upload_date)s for the upload date (YYYYMMDD), ' '%(extractor)s for the provider (youtube, metacafe, etc), ' '%(id)s for the video id, ' @@ -617,7 +618,7 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '--autonumber-size', dest='autonumber_size', metavar='NUMBER', - help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given') + help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given') filesystem.add_option( '--restrict-filenames', action='store_true', dest='restrictfilenames', default=False, @@ -625,55 +626,55 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '-A', '--auto-number', action='store_true', dest='autonumber', default=False, - help='[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000') + help='[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000') filesystem.add_option( '-t', '--title', action='store_true', dest='usetitle', default=False, - help='[deprecated] use title in file name (default)') + help='[deprecated] Use title in file name (default)') filesystem.add_option( '-l', '--literal', default=False, action='store_true', dest='usetitle', - help='[deprecated] alias of --title') + help='[deprecated] Alias of --title') filesystem.add_option( '-w', '--no-overwrites', action='store_true', dest='nooverwrites', default=False, - help='do not overwrite files') + help='Do not overwrite files') filesystem.add_option( '-c', '--continue', action='store_true', dest='continue_dl', default=True, - help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.') + help='Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.') filesystem.add_option( '--no-continue', action='store_false', dest='continue_dl', - help='do not resume partially downloaded files (restart from beginning)') + help='Do not resume partially downloaded files (restart from beginning)') filesystem.add_option( '--no-part', action='store_true', dest='nopart', default=False, - help='do not use .part files - write directly into output file') + help='Do not use .part files - write directly into output file') filesystem.add_option( '--no-mtime', action='store_false', dest='updatetime', default=True, - help='do not use the Last-modified header to set the file modification time') + help='Do not use the Last-modified header to set the file modification time') filesystem.add_option( '--write-description', action='store_true', dest='writedescription', default=False, - help='write video description to a .description file') + help='Write video description to a .description file') filesystem.add_option( '--write-info-json', action='store_true', dest='writeinfojson', default=False, - help='write video metadata to a .info.json file') + help='Write video metadata to a .info.json file') filesystem.add_option( '--write-annotations', action='store_true', dest='writeannotations', default=False, - help='write video annotations to a .annotation file') + help='Write video annotations to a .annotation file') filesystem.add_option( '--load-info', dest='load_info_filename', metavar='FILE', - help='json file containing the video information (created with the "--write-json" option)') + help='JSON file containing the video information (created with the "--write-info-json" option)') filesystem.add_option( '--cookies', dest='cookiefile', metavar='FILE', - help='file to read cookies from and dump cookie jar in') + help='File to read cookies from and dump cookie jar in') filesystem.add_option( '--cache-dir', dest='cachedir', default=None, metavar='DIR', help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') @@ -689,11 +690,11 @@ def parseOpts(overrideArguments=None): thumbnail.add_option( '--write-thumbnail', action='store_true', dest='writethumbnail', default=False, - help='write thumbnail image to disk') + help='Write thumbnail image to disk') thumbnail.add_option( '--write-all-thumbnails', action='store_true', dest='write_all_thumbnails', default=False, - help='write all thumbnail image formats to disk') + help='Write all thumbnail image formats to disk') thumbnail.add_option( '--list-thumbnails', action='store_true', dest='list_thumbnails', default=False, @@ -703,14 +704,14 @@ def parseOpts(overrideArguments=None): postproc.add_option( '-x', '--extract-audio', action='store_true', dest='extractaudio', default=False, - help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') + help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') postproc.add_option( '--audio-format', metavar='FORMAT', dest='audioformat', default='best', - help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default') + help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default') postproc.add_option( '--audio-quality', metavar='QUALITY', dest='audioquality', default='5', - help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)') + help='Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)') postproc.add_option( '--recode-video', metavar='FORMAT', dest='recodevideo', default=None, @@ -718,27 +719,27 @@ def parseOpts(overrideArguments=None): postproc.add_option( '-k', '--keep-video', action='store_true', dest='keepvideo', default=False, - help='keeps the video file on disk after the post-processing; the video is erased by default') + help='Keep the video file on disk after the post-processing; the video is erased by default') postproc.add_option( '--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, - help='do not overwrite post-processed files; the post-processed files are overwritten by default') + help='Do not overwrite post-processed files; the post-processed files are overwritten by default') postproc.add_option( '--embed-subs', action='store_true', dest='embedsubtitles', default=False, - help='embed subtitles in the video (only for mp4 videos)') + help='Embed subtitles in the video (only for mp4 videos)') postproc.add_option( '--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False, - help='embed thumbnail in the audio as cover art') + help='Embed thumbnail in the audio as cover art') postproc.add_option( '--add-metadata', action='store_true', dest='addmetadata', default=False, - help='write metadata to the video file') + help='Write metadata to the video file') postproc.add_option( '--metadata-from-title', metavar='FORMAT', dest='metafromtitle', - help='parse additional metadata like song title / artist from the video title. ' + help='Parse additional metadata like song title / artist from the video title. ' 'The format syntax is the same as --output, ' 'the parsed parameters replace existing values. ' 'Additional templates: %(album), %(artist). ' @@ -747,7 +748,7 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--xattrs', action='store_true', dest='xattrs', default=False, - help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)') + help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)') postproc.add_option( '--fixup', metavar='POLICY', dest='fixup', default='detect_or_warn', @@ -793,21 +794,22 @@ def parseOpts(overrideArguments=None): if opts.verbose: write_string('[debug] Override config: ' + repr(overrideArguments) + '\n') else: - command_line_conf = sys.argv[1:] - # Workaround for Python 2.x, where argv is a byte list - if sys.version_info < (3,): - command_line_conf = [ - a.decode('utf-8', 'replace') for a in command_line_conf] + def compat_conf(conf): + if sys.version_info < (3,): + return [a.decode(preferredencoding(), 'replace') for a in conf] + return conf + + command_line_conf = compat_conf(sys.argv[1:]) if '--ignore-config' in command_line_conf: system_conf = [] user_conf = [] else: - system_conf = _readOptions('/etc/youtube-dl.conf') + system_conf = compat_conf(_readOptions('/etc/youtube-dl.conf')) if '--ignore-config' in system_conf: user_conf = [] else: - user_conf = _readUserConf() + user_conf = compat_conf(_readUserConf()) argv = system_conf + user_conf + command_line_conf opts, args = parser.parse_args(argv) diff --git a/youtube_dl/postprocessor/common.py b/youtube_dl/postprocessor/common.py index e54ae678d..ef9fdfa19 100644 --- a/youtube_dl/postprocessor/common.py +++ b/youtube_dl/postprocessor/common.py @@ -1,6 +1,11 @@ from __future__ import unicode_literals -from ..utils import PostProcessingError +import os + +from ..utils import ( + PostProcessingError, + encodeFilename, +) class PostProcessor(object): @@ -46,6 +51,12 @@ class PostProcessor(object): """ return None, information # by default, keep file and do nothing + def try_utime(self, path, atime, mtime, errnote='Cannot update utime of file'): + try: + os.utime(encodeFilename(path), (atime, mtime)) + except Exception: + self._downloader.report_warning(errnote) + class AudioConversionError(PostProcessingError): pass diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 55adf9685..8e99a3c2c 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -116,6 +116,10 @@ class FFmpegPostProcessor(PostProcessor): def executable(self): return self._paths[self.basename] + @property + def probe_available(self): + return self.probe_basename is not None + @property def probe_executable(self): return self._paths[self.probe_basename] @@ -142,7 +146,8 @@ class FFmpegPostProcessor(PostProcessor): stderr = stderr.decode('utf-8', 'replace') msg = stderr.strip().split('\n')[-1] raise FFmpegPostProcessorError(msg) - os.utime(encodeFilename(out_path), (oldest_mtime, oldest_mtime)) + self.try_utime(out_path, oldest_mtime, oldest_mtime) + if self._deletetempfiles: for ipath in input_paths: os.remove(ipath) @@ -168,7 +173,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): def get_audio_codec(self, path): - if not self.probe_executable: + if not self.probe_available: raise PostProcessingError('ffprobe or avprobe not found. Please install one.') try: cmd = [ @@ -276,10 +281,9 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): # Try to update the date time for extracted audio file. if information.get('filetime') is not None: - try: - os.utime(encodeFilename(new_path), (time.time(), information['filetime'])) - except Exception: - self._downloader.report_warning('Cannot update utime of audio file') + self.try_utime( + new_path, time.time(), information['filetime'], + errnote='Cannot update utime of audio file') information['filepath'] = new_path return self._nopostoverwrites, information diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 90e0ed9ab..52f0dd09a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -730,7 +730,8 @@ def unified_strdate(date_str, day_first=True): # Replace commas date_str = date_str.replace(',', ' ') # %z (UTC offset) is only supported in python>=3.2 - date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) + if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str): + date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) # Remove AM/PM + timezone date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) @@ -759,6 +760,7 @@ def unified_strdate(date_str, day_first=True): ] if day_first: format_expressions.extend([ + '%d-%m-%Y', '%d.%m.%Y', '%d/%m/%Y', '%d/%m/%y', @@ -766,6 +768,7 @@ def unified_strdate(date_str, day_first=True): ]) else: format_expressions.extend([ + '%m-%d-%Y', '%m.%d.%Y', '%m/%d/%Y', '%m/%d/%y', @@ -1577,7 +1580,7 @@ def js_to_json(code): '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'| [a-zA-Z_][.a-zA-Z_0-9]* ''', fix_kv, code) - res = re.sub(r',(\s*\])', lambda m: m.group(1), res) + res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res) return res diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e1c385bec..1095fea2f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2015.04.03' +__version__ = '2015.04.09'