From 575dad3c9842f333c4af27563a26bddaf0015fa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roman=20Le=20N=C3=A9grate?= Date: Sun, 22 Mar 2015 20:25:44 +0100 Subject: [PATCH 01/95] [pornovoisines] Add extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/pornovoisines.py | 101 ++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 youtube_dl/extractor/pornovoisines.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d73826d44..17d075ec8 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -382,6 +382,7 @@ from .pornhub import ( PornHubPlaylistIE, ) from .pornotube import PornotubeIE +from .pornovoisines import PornoVoisinesIE from .pornoxo import PornoXOIE from .primesharetv import PrimeShareTVIE from .promptfile import PromptFileIE diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py new file mode 100644 index 000000000..efbb6a818 --- /dev/null +++ b/youtube_dl/extractor/pornovoisines.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import datetime +import random + +from ..compat import compat_urllib_parse +from .common import InfoExtractor + +class PornoVoisinesIE(InfoExtractor): + _VALID_URL = r'^((?:http://)?(?:www\.)?pornovoisines.com)/showvideo/(\d+)/([^/]+)' + + VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \ + '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4' + + SERVER_NUMBERS = (1, 2) + + _TEST = { + 'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/', + 'md5': '5ac670803bc12e9e7f9f662ce64cf1d1', + 'info_dict': { + 'id': '1285', + 'display_id': 'recherche-appartement', + 'ext': 'mp4', + 'title': "Recherche appartement", + 'upload_date': '20140925', + 'view_count': int, + 'duration': 120, + 'categories': ["Débutante", "Scénario", "Sodomie"], + 'description': 're:^Pour la .+ original...$', + 'thumbnail': 're:^http://', + 'uploader': "JMTV", + 'average_rating': float, + 'comment_count': int, + 'age_limit': 18, + } + } + + @classmethod + def build_video_url(cls, id): + server_nr = random.choice(cls.SERVER_NUMBERS) + return cls.VIDEO_URL_TEMPLATE % (server_nr, id) + + @staticmethod + def parse_upload_date(str): + return datetime.datetime.strptime(str, "%d-%m-%Y").strftime("%Y%m%d") + + @staticmethod + def parse_categories(str): + return map(lambda s: s.strip(), str.split(',')) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + url_prefix = mobj.group(1) + id = mobj.group(2) + display_id = mobj.group(3) + + webpage = self._download_webpage(url, id) + + title = self._html_search_regex(r'

(.+?)

', webpage, 'title', + flags=re.DOTALL) + url = self.build_video_url(id) + upload_date = self.parse_upload_date( + self._search_regex(r'Publié le (\d\d-\d\d-\d{4})', webpage, + 'upload date')) + view_count = int(self._search_regex(r'(\d+) vues', webpage, 'view count')) + duration = int(self._search_regex('Durée (\d+)', webpage, 'duration')) + categories = self.parse_categories(self._html_search_regex( + r'
  • (.+?)
  • ', webpage, "categories", + flags=re.DOTALL)) + description = self._html_search_regex( + r'
    (.+?)
    ', webpage, "description", + flags=re.DOTALL) + thumbnail = url_prefix + self._html_search_regex(re.compile( + '
    .*?(.+?)', webpage, + "uploader", flags=re.DOTALL)) + average_rating = float(self._search_regex(r'Note : (\d+,\d+)', + webpage, "average rating").replace(',', '.')) + comment_count = int(self._search_regex(r'\((\d+)\)', webpage, + "comment count")) + + return { + 'id': id, + 'display_id': display_id, + 'url': url, + 'title': title, + 'upload_date': upload_date, + 'view_count': view_count, + 'duration': duration, + 'categories': categories, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'average_rating': average_rating, + 'comment_count': comment_count, + 'age_limit': 18, + } From 17941321ab0b3f9548d1f65e3f9d69e8cd01c0a3 Mon Sep 17 00:00:00 2001 From: testbonn Date: Wed, 25 Mar 2015 11:02:55 +0100 Subject: [PATCH 02/95] Clean up of --help output For consistency and readability --- youtube_dl/options.py | 196 +++++++++++++++++++++--------------------- 1 file changed, 98 insertions(+), 98 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 35c7e5fb3..68193a271 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -120,19 +120,19 @@ def parseOpts(overrideArguments=None): general.add_option( '-h', '--help', action='help', - help='print this help text and exit') + help='Print this help text and exit') general.add_option( '-v', '--version', action='version', - help='print program version and exit') + help='Print program version and exit') general.add_option( '-U', '--update', action='store_true', dest='update_self', - help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') + help='Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') general.add_option( '-i', '--ignore-errors', action='store_true', dest='ignoreerrors', default=False, - help='continue on download errors, for example to skip unavailable videos in a playlist') + help='Continue on download errors, for example to skip unavailable videos in a playlist') general.add_option( '--abort-on-error', action='store_false', dest='ignoreerrors', @@ -140,7 +140,7 @@ def parseOpts(overrideArguments=None): general.add_option( '--dump-user-agent', action='store_true', dest='dump_user_agent', default=False, - help='display the current browser identification') + help='Display the current browser identification') general.add_option( '--list-extractors', action='store_true', dest='list_extractors', default=False, @@ -152,7 +152,7 @@ def parseOpts(overrideArguments=None): general.add_option( '--default-search', dest='default_search', metavar='PREFIX', - help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.') + help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.') general.add_option( '--ignore-config', action='store_true', @@ -169,7 +169,7 @@ def parseOpts(overrideArguments=None): '--no-color', '--no-colors', action='store_true', dest='no_color', default=False, - help='Do not emit color codes in output.') + help='Do not emit color codes in output') network = optparse.OptionGroup(parser, 'Network Options') network.add_option( @@ -206,23 +206,23 @@ def parseOpts(overrideArguments=None): selection.add_option( '--playlist-start', dest='playliststart', metavar='NUMBER', default=1, type=int, - help='playlist video to start at (default is %default)') + help='Playlist video to start at (default is %default)') selection.add_option( '--playlist-end', dest='playlistend', metavar='NUMBER', default=None, type=int, - help='playlist video to end at (default is last)') + help='Playlist video to end at (default is last)') selection.add_option( '--playlist-items', dest='playlist_items', metavar='ITEM_SPEC', default=None, - help='playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.') + help='Playlist video items to download. Specify indices of the videos in the playlist seperated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.') selection.add_option( '--match-title', dest='matchtitle', metavar='REGEX', - help='download only matching titles (regex or caseless sub-string)') + help='Download only matching titles (regex or caseless sub-string)') selection.add_option( '--reject-title', dest='rejecttitle', metavar='REGEX', - help='skip download for matching titles (regex or caseless sub-string)') + help='Skip download for matching titles (regex or caseless sub-string)') selection.add_option( '--max-downloads', dest='max_downloads', metavar='NUMBER', type=int, default=None, @@ -238,19 +238,19 @@ def parseOpts(overrideArguments=None): selection.add_option( '--date', metavar='DATE', dest='date', default=None, - help='download only videos uploaded in this date') + help='Download only videos uploaded in this date') selection.add_option( '--datebefore', metavar='DATE', dest='datebefore', default=None, - help='download only videos uploaded on or before this date (i.e. inclusive)') + help='Download only videos uploaded on or before this date (i.e. inclusive)') selection.add_option( '--dateafter', metavar='DATE', dest='dateafter', default=None, - help='download only videos uploaded on or after this date (i.e. inclusive)') + help='Download only videos uploaded on or after this date (i.e. inclusive)') selection.add_option( '--min-views', metavar='COUNT', dest='min_views', default=None, type=int, - help='Do not download any videos with less than COUNT views',) + help='Do not download any videos with less than COUNT views') selection.add_option( '--max-views', metavar='COUNT', dest='max_views', default=None, type=int, @@ -259,7 +259,7 @@ def parseOpts(overrideArguments=None): '--match-filter', metavar='FILTER', dest='match_filter', default=None, help=( - '(Experimental) Generic video filter. ' + 'Generic video filter (experimental). ' 'Specify any key (see help for -o for a list of available keys) to' ' match if the key is present, ' '!key to check if the key is not present,' @@ -277,15 +277,15 @@ def parseOpts(overrideArguments=None): selection.add_option( '--no-playlist', action='store_true', dest='noplaylist', default=False, - help='If the URL refers to a video and a playlist, download only the video.') + help='Download only the video, if the URL refers to a video and a playlist.') selection.add_option( '--yes-playlist', action='store_false', dest='noplaylist', default=False, - help='If the URL refers to a video and a playlist, download the playlist.') + help='Download the playlist, if the URL refers to a video and a playlist.') selection.add_option( '--age-limit', metavar='YEARS', dest='age_limit', default=None, type=int, - help='download only videos suitable for the given age') + help='Download only videos suitable for the given age') selection.add_option( '--download-archive', metavar='FILE', dest='download_archive', @@ -299,30 +299,30 @@ def parseOpts(overrideArguments=None): authentication.add_option( '-u', '--username', dest='username', metavar='USERNAME', - help='login with this account ID') + help='Login with this account ID') authentication.add_option( '-p', '--password', dest='password', metavar='PASSWORD', - help='account password. If this option is left out, youtube-dl will ask interactively.') + help='Account password. If this option is left out, youtube-dl will ask interactively.') authentication.add_option( '-2', '--twofactor', dest='twofactor', metavar='TWOFACTOR', - help='two-factor auth code') + help='Two-factor auth code') authentication.add_option( '-n', '--netrc', action='store_true', dest='usenetrc', default=False, - help='use .netrc authentication data') + help='Use .netrc authentication data') authentication.add_option( '--video-password', dest='videopassword', metavar='PASSWORD', - help='video password (vimeo, smotri)') + help='Video password (vimeo, smotri)') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( '-f', '--format', action='store', dest='format', metavar='FORMAT', default=None, help=( - 'video format code, specify the order of preference using' + 'Video format code, specify the order of preference using' ' slashes, as in -f 22/17/18 . ' ' Instead of format codes, you can select by extension for the ' 'extensions aac, m4a, mp3, mp4, ogg, wav, webm. ' @@ -350,19 +350,19 @@ def parseOpts(overrideArguments=None): video_format.add_option( '--all-formats', action='store_const', dest='format', const='all', - help='download all available video formats') + help='Download all available video formats') video_format.add_option( '--prefer-free-formats', action='store_true', dest='prefer_free_formats', default=False, - help='prefer free video formats unless a specific one is requested') + help='Prefer free video formats unless a specific one is requested') video_format.add_option( '--max-quality', action='store', dest='format_limit', metavar='FORMAT', - help='highest quality format to download') + help='Specify highest quality format to download') video_format.add_option( '-F', '--list-formats', action='store_true', dest='listformats', - help='list all available formats') + help='List all available formats') video_format.add_option( '--youtube-include-dash-manifest', action='store_true', dest='youtube_include_dash_manifest', default=True, @@ -382,46 +382,46 @@ def parseOpts(overrideArguments=None): subtitles.add_option( '--write-sub', '--write-srt', action='store_true', dest='writesubtitles', default=False, - help='write subtitle file') + help='Write subtitle file') subtitles.add_option( '--write-auto-sub', '--write-automatic-sub', action='store_true', dest='writeautomaticsub', default=False, - help='write automatic subtitle file (youtube only)') + help='Write automatic subtitle file (YouTube only)') subtitles.add_option( '--all-subs', action='store_true', dest='allsubtitles', default=False, - help='downloads all the available subtitles of the video') + help='Download all the available subtitles of the video') subtitles.add_option( '--list-subs', action='store_true', dest='listsubtitles', default=False, - help='lists all available subtitles for the video') + help='List all available subtitles for the video') subtitles.add_option( '--sub-format', action='store', dest='subtitlesformat', metavar='FORMAT', default='best', - help='subtitle format, accepts formats preference, for example: "ass/srt/best"') + help='Specify subtitle format preference, for example: "srt" or "ass/srt/best"') subtitles.add_option( '--sub-lang', '--sub-langs', '--srt-lang', action='callback', dest='subtitleslangs', metavar='LANGS', type='str', default=[], callback=_comma_separated_values_options_callback, - help='languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') + help='Languages of the subtitles to download (optional) separated by commas, use IETF language tags like \'en,pt\'') downloader = optparse.OptionGroup(parser, 'Download Options') downloader.add_option( '-r', '--rate-limit', dest='ratelimit', metavar='LIMIT', - help='maximum download rate in bytes per second (e.g. 50K or 4.2M)') + help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)') downloader.add_option( '-R', '--retries', dest='retries', metavar='RETRIES', default=10, - help='number of retries (default is %default), or "infinite".') + help='Number of retries (default is %default), or "infinite".') downloader.add_option( '--buffer-size', dest='buffersize', metavar='SIZE', default='1024', - help='size of download buffer (e.g. 1024 or 16K) (default is %default)') + help='Size of download buffer (e.g. 1024 or 16K) (default is %default)') downloader.add_option( '--no-resize-buffer', action='store_true', dest='noresizebuffer', default=False, - help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.') + help='Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.') downloader.add_option( '--test', action='store_true', dest='test', default=False, @@ -433,11 +433,11 @@ def parseOpts(overrideArguments=None): downloader.add_option( '--xattr-set-filesize', dest='xattr_set_filesize', action='store_true', - help='(experimental) set file xattribute ytdl.filesize with expected filesize') + help='Set file xattribute ytdl.filesize with expected filesize (experimental)') downloader.add_option( '--hls-prefer-native', dest='hls_prefer_native', action='store_true', - help='(experimental) Use the native HLS downloader instead of ffmpeg.') + help='Use the native HLS downloader instead of ffmpeg (experimental)') downloader.add_option( '--external-downloader', dest='external_downloader', metavar='COMMAND', @@ -446,7 +446,7 @@ def parseOpts(overrideArguments=None): downloader.add_option( '--external-downloader-args', dest='external_downloader_args', metavar='ARGS', - help='Give these arguments to the external downloader.') + help='Give these arguments to the external downloader') workarounds = optparse.OptionGroup(parser, 'Workarounds') workarounds.add_option( @@ -456,7 +456,7 @@ def parseOpts(overrideArguments=None): workarounds.add_option( '--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, - help='Suppress HTTPS certificate validation.') + help='Suppress HTTPS certificate validation') workarounds.add_option( '--prefer-insecure', '--prefer-unsecure', action='store_true', dest='prefer_insecure', @@ -464,16 +464,16 @@ def parseOpts(overrideArguments=None): workarounds.add_option( '--user-agent', metavar='UA', dest='user_agent', - help='specify a custom user agent') + help='Specify a custom user agent') workarounds.add_option( '--referer', metavar='URL', dest='referer', default=None, - help='specify a custom referer, use if the video access is restricted to one domain', + help='Specify a custom referer, use if the video access is restricted to one domain', ) workarounds.add_option( '--add-header', metavar='FIELD:VALUE', dest='headers', action='append', - help='specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', + help='Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times', ) workarounds.add_option( '--bidi-workaround', @@ -488,7 +488,7 @@ def parseOpts(overrideArguments=None): verbosity.add_option( '-q', '--quiet', action='store_true', dest='quiet', default=False, - help='activates quiet mode') + help='Activate quiet mode') verbosity.add_option( '--no-warnings', dest='no_warnings', action='store_true', default=False, @@ -496,51 +496,51 @@ def parseOpts(overrideArguments=None): verbosity.add_option( '-s', '--simulate', action='store_true', dest='simulate', default=False, - help='do not download the video and do not write anything to disk',) + help='Do not download the video and do not write anything to disk') verbosity.add_option( '--skip-download', action='store_true', dest='skip_download', default=False, - help='do not download the video',) + help='Do not download the video') verbosity.add_option( '-g', '--get-url', action='store_true', dest='geturl', default=False, - help='simulate, quiet but print URL') + help='Simulate, quiet but print URL') verbosity.add_option( '-e', '--get-title', action='store_true', dest='gettitle', default=False, - help='simulate, quiet but print title') + help='Simulate, quiet but print title') verbosity.add_option( '--get-id', action='store_true', dest='getid', default=False, - help='simulate, quiet but print id') + help='Simulate, quiet but print id') verbosity.add_option( '--get-thumbnail', action='store_true', dest='getthumbnail', default=False, - help='simulate, quiet but print thumbnail URL') + help='Simulate, quiet but print thumbnail URL') verbosity.add_option( '--get-description', action='store_true', dest='getdescription', default=False, - help='simulate, quiet but print video description') + help='Simulate, quiet but print video description') verbosity.add_option( '--get-duration', action='store_true', dest='getduration', default=False, - help='simulate, quiet but print video length') + help='Simulate, quiet but print video length') verbosity.add_option( '--get-filename', action='store_true', dest='getfilename', default=False, - help='simulate, quiet but print output filename') + help='Simulate, quiet but print output filename') verbosity.add_option( '--get-format', action='store_true', dest='getformat', default=False, - help='simulate, quiet but print output format') + help='Simulate, quiet but print output format') verbosity.add_option( '-j', '--dump-json', action='store_true', dest='dumpjson', default=False, - help='simulate, quiet but print JSON information. See --output for a description of available keys.') + help='Simulate, quiet but print JSON information. See --output for a description of available keys.') verbosity.add_option( '-J', '--dump-single-json', action='store_true', dest='dump_single_json', default=False, - help='simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.') + help='Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.') verbosity.add_option( '--print-json', action='store_true', dest='print_json', default=False, @@ -549,23 +549,23 @@ def parseOpts(overrideArguments=None): verbosity.add_option( '--newline', action='store_true', dest='progress_with_newline', default=False, - help='output progress bar as new lines') + help='Output progress bar as new lines') verbosity.add_option( '--no-progress', action='store_true', dest='noprogress', default=False, - help='do not print progress bar') + help='Do not print progress bar') verbosity.add_option( '--console-title', action='store_true', dest='consoletitle', default=False, - help='display progress in console titlebar') + help='Display progress in console titlebar') verbosity.add_option( '-v', '--verbose', action='store_true', dest='verbose', default=False, - help='print various debugging information') + help='Print various debugging information') verbosity.add_option( '--dump-pages', '--dump-intermediate-pages', action='store_true', dest='dump_intermediate_pages', default=False, - help='print downloaded pages to debug problems (very verbose)') + help='Print downloaded pages to debug problems (very verbose)') verbosity.add_option( '--write-pages', action='store_true', dest='write_pages', default=False, @@ -581,31 +581,31 @@ def parseOpts(overrideArguments=None): verbosity.add_option( '-C', '--call-home', dest='call_home', action='store_true', default=False, - help='Contact the youtube-dl server for debugging.') + help='Contact the youtube-dl server for debugging') verbosity.add_option( '--no-call-home', dest='call_home', action='store_false', default=False, - help='Do NOT contact the youtube-dl server for debugging.') + help='Do NOT contact the youtube-dl server for debugging') filesystem = optparse.OptionGroup(parser, 'Filesystem Options') filesystem.add_option( '-a', '--batch-file', dest='batchfile', metavar='FILE', - help='file containing URLs to download (\'-\' for stdin)') + help='File containing URLs to download (\'-\' for stdin)') filesystem.add_option( '--id', default=False, - action='store_true', dest='useid', help='use only video ID in file name') + action='store_true', dest='useid', help='Use only video ID in file name') filesystem.add_option( '-o', '--output', dest='outtmpl', metavar='TEMPLATE', - help=('output filename template. Use %(title)s to get the title, ' + help=('Output filename template. Use %(title)s to get the title, ' '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, ' '%(autonumber)s to get an automatically incremented number, ' '%(ext)s for the filename extension, ' '%(format)s for the format description (like "22 - 1280x720" or "HD"), ' - '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), ' + '%(format_id)s for the unique id of the format (like YouTube\'s itags: "137"), ' '%(upload_date)s for the upload date (YYYYMMDD), ' - '%(extractor)s for the provider (youtube, metacafe, etc), ' + '%(extractor)s for the provider (YouTube, metacafe, etc), ' '%(id)s for the video id, ' '%(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, ' '%(playlist_index)s for the position in the playlist. ' @@ -617,7 +617,7 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '--autonumber-size', dest='autonumber_size', metavar='NUMBER', - help='Specifies the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given') + help='Specify the number of digits in %(autonumber)s when it is present in output filename template or --auto-number option is given') filesystem.add_option( '--restrict-filenames', action='store_true', dest='restrictfilenames', default=False, @@ -625,55 +625,55 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '-A', '--auto-number', action='store_true', dest='autonumber', default=False, - help='[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] number downloaded files starting from 00000') + help='[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number of downloaded files starting from 00000') filesystem.add_option( '-t', '--title', action='store_true', dest='usetitle', default=False, - help='[deprecated] use title in file name (default)') + help='[deprecated] Use title in file name (default)') filesystem.add_option( '-l', '--literal', default=False, action='store_true', dest='usetitle', - help='[deprecated] alias of --title') + help='[deprecated] Alias of --title') filesystem.add_option( '-w', '--no-overwrites', action='store_true', dest='nooverwrites', default=False, - help='do not overwrite files') + help='Do not overwrite files') filesystem.add_option( '-c', '--continue', action='store_true', dest='continue_dl', default=True, - help='force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.') + help='Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.') filesystem.add_option( '--no-continue', action='store_false', dest='continue_dl', - help='do not resume partially downloaded files (restart from beginning)') + help='Do not resume partially downloaded files (restart from beginning)') filesystem.add_option( '--no-part', action='store_true', dest='nopart', default=False, - help='do not use .part files - write directly into output file') + help='Do not use .part files - write directly into output file') filesystem.add_option( '--no-mtime', action='store_false', dest='updatetime', default=True, - help='do not use the Last-modified header to set the file modification time') + help='Do not use the Last-modified header to set the file modification time') filesystem.add_option( '--write-description', action='store_true', dest='writedescription', default=False, - help='write video description to a .description file') + help='Write video description to a .description file') filesystem.add_option( '--write-info-json', action='store_true', dest='writeinfojson', default=False, - help='write video metadata to a .info.json file') + help='Write video metadata to a .info.json file') filesystem.add_option( '--write-annotations', action='store_true', dest='writeannotations', default=False, - help='write video annotations to a .annotation file') + help='Write video annotations to a .annotation file') filesystem.add_option( '--load-info', dest='load_info_filename', metavar='FILE', - help='json file containing the video information (created with the "--write-json" option)') + help='Specify JSON file containing the video information (created with the "--write-json" option)') filesystem.add_option( '--cookies', dest='cookiefile', metavar='FILE', - help='file to read cookies from and dump cookie jar in') + help='File to read cookies from and dump cookie jar in') filesystem.add_option( '--cache-dir', dest='cachedir', default=None, metavar='DIR', help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.') @@ -689,11 +689,11 @@ def parseOpts(overrideArguments=None): thumbnail.add_option( '--write-thumbnail', action='store_true', dest='writethumbnail', default=False, - help='write thumbnail image to disk') + help='Write thumbnail image to disk') thumbnail.add_option( '--write-all-thumbnails', action='store_true', dest='write_all_thumbnails', default=False, - help='write all thumbnail image formats to disk') + help='Write all thumbnail image formats to disk') thumbnail.add_option( '--list-thumbnails', action='store_true', dest='list_thumbnails', default=False, @@ -703,14 +703,14 @@ def parseOpts(overrideArguments=None): postproc.add_option( '-x', '--extract-audio', action='store_true', dest='extractaudio', default=False, - help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') + help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') postproc.add_option( '--audio-format', metavar='FORMAT', dest='audioformat', default='best', - help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default') + help='Specify audio format: "best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; "%default" by default') postproc.add_option( '--audio-quality', metavar='QUALITY', dest='audioquality', default='5', - help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)') + help='Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)') postproc.add_option( '--recode-video', metavar='FORMAT', dest='recodevideo', default=None, @@ -718,27 +718,27 @@ def parseOpts(overrideArguments=None): postproc.add_option( '-k', '--keep-video', action='store_true', dest='keepvideo', default=False, - help='keeps the video file on disk after the post-processing; the video is erased by default') + help='Keep the video file on disk after the post-processing; the video is erased by default') postproc.add_option( '--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, - help='do not overwrite post-processed files; the post-processed files are overwritten by default') + help='Do not overwrite post-processed files; the post-processed files are overwritten by default') postproc.add_option( '--embed-subs', action='store_true', dest='embedsubtitles', default=False, - help='embed subtitles in the video (only for mp4 videos)') + help='Embed subtitles in the video (only for mp4 videos)') postproc.add_option( '--embed-thumbnail', action='store_true', dest='embedthumbnail', default=False, - help='embed thumbnail in the audio as cover art') + help='Embed thumbnail in the audio as cover art') postproc.add_option( '--add-metadata', action='store_true', dest='addmetadata', default=False, - help='write metadata to the video file') + help='Write metadata to the video file') postproc.add_option( '--metadata-from-title', metavar='FORMAT', dest='metafromtitle', - help='parse additional metadata like song title / artist from the video title. ' + help='Parse additional metadata like song title / artist from the video title. ' 'The format syntax is the same as --output, ' 'the parsed parameters replace existing values. ' 'Additional templates: %(album), %(artist). ' @@ -747,7 +747,7 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--xattrs', action='store_true', dest='xattrs', default=False, - help='write metadata to the video file\'s xattrs (using dublin core and xdg standards)') + help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)') postproc.add_option( '--fixup', metavar='POLICY', dest='fixup', default='detect_or_warn', From 4bbeb19fc77a49af763ce3443293b29b8450d686 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 3 Apr 2015 14:09:07 +0200 Subject: [PATCH 03/95] [miomio] pep8: remove whitespaces in empty line --- youtube_dl/extractor/miomio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/miomio.py b/youtube_dl/extractor/miomio.py index 11608f730..cc3f27194 100644 --- a/youtube_dl/extractor/miomio.py +++ b/youtube_dl/extractor/miomio.py @@ -44,7 +44,7 @@ class MioMioIE(InfoExtractor): xml_config = self._search_regex( r'flashvars="type=sina&(.+?)&', webpage, 'xml config') - + # skipping the following page causes lags and eventually connection drop-outs self._request_webpage( 'http://www.miomio.tv/mioplayer/mioplayerconfigfiles/xml.php?id=%s&r=%s' % (id, random.randint(100, 999)), From 3da4b31359c8dbbad3477fbe8341e6fc293cda82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 3 Apr 2015 14:09:50 +0200 Subject: [PATCH 04/95] [postprocessor/ffmpeg] Fix crash when ffprobe/avprobe are not installed (closes #5349) 'self.probe_basename' was None, so 'probe_executable' raised a KeyError exception --- youtube_dl/postprocessor/ffmpeg.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 55adf9685..0b60ac7e7 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -116,6 +116,10 @@ class FFmpegPostProcessor(PostProcessor): def executable(self): return self._paths[self.basename] + @property + def probe_available(self): + return self.probe_basename is not None + @property def probe_executable(self): return self._paths[self.probe_basename] @@ -168,7 +172,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): def get_audio_codec(self, path): - if not self.probe_executable: + if not self.probe_available: raise PostProcessingError('ffprobe or avprobe not found. Please install one.') try: cmd = [ From ff2be6e180f1af471dd6d533719d9c595c756557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 3 Apr 2015 15:01:17 +0200 Subject: [PATCH 05/95] [bloomberg] Adapt to website changes (fixes #5347) --- youtube_dl/extractor/bloomberg.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index 4a88ccd13..0dca29b71 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -6,32 +6,39 @@ from .common import InfoExtractor class BloombergIE(InfoExtractor): - _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P.+?)\.html' + _VALID_URL = r'https?://www\.bloomberg\.com/news/videos/[^/]+/(?P[^/?#]+)' _TEST = { - 'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', + 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2', # The md5 checksum changes 'info_dict': { 'id': 'qurhIVlJSB6hzkVi229d8g', 'ext': 'flv', 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies', - 'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88', + 'description': 'md5:a8ba0302912d03d246979735c17d2761', }, } def _real_extract(self, url): name = self._match_id(url) webpage = self._download_webpage(url, name) - - f4m_url = self._search_regex( - r' Date: Fri, 3 Apr 2015 15:34:49 +0200 Subject: [PATCH 06/95] [Gamersyde] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/gamersyde.py | 64 +++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 youtube_dl/extractor/gamersyde.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index aae4aae4c..2935d5b33 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -177,6 +177,7 @@ from .gameone import ( GameOneIE, GameOnePlaylistIE, ) +from .gamersyde import GamersydeIE from .gamespot import GameSpotIE from .gamestar import GameStarIE from .gametrailers import GametrailersIE diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py new file mode 100644 index 000000000..c40106216 --- /dev/null +++ b/youtube_dl/extractor/gamersyde.py @@ -0,0 +1,64 @@ +# coding: utf-8 +from __future__ import unicode_literals +import re +import json +import time +from .common import InfoExtractor + + +class GamersydeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_' + _TEST = { + 'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html', + 'md5': 'f38d400d32f19724570040d5ce3a505f', + 'info_dict': { + 'id': '34371', + 'ext': 'mp4', + 'title': 'Bloodborne - Birth of a hero', + 'thumbnail': 're:^https?://.*\.jpg$', + } + } + + def _calculateDuration(self, durationString): + duration = time.strptime(durationString, "%M minutes %S seconds") + return duration.tm_min * 60 + duration.tm_sec + + def _fixJsonSyntax(self, json): + + json = re.sub(r"{\s*(\w)", r'{"\1', json) + json = re.sub(r",\s*(\w)", r',"\1', json) + json = re.sub(r"(\w): ", r'\1":', json) + json = re.sub(r",\s*}", "}", json, flags=re.DOTALL) + json = re.sub(r",\s*]", "]", json, flags=re.DOTALL) + + return json + + def _real_extract(self, url): + + video_id = self._search_regex(r'-(.*?)_[a-z]{2}.html$', url, 'video_id') + webpage = self._download_webpage(url, video_id) + + filesJson = self._search_regex(r'playlist: (.*?)\}\);', webpage, 'files', flags=re.DOTALL) + filesJson = self._fixJsonSyntax(filesJson) + + data = json.loads(filesJson) + playlist = data[0] + + formats = [] + + title = re.sub(r"[0-9]+ - ", "", playlist['title']) + + for playlistEntry in playlist['sources']: + format = { + 'url': playlistEntry['file'], + 'format_id': playlistEntry['label'] + } + + formats.append(format) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': playlist['image'] + } From 185a7e25e7c18b0dff17bdb9ae828616a5ac17d4 Mon Sep 17 00:00:00 2001 From: Mohammad Teimori Pabandi Date: Fri, 3 Apr 2015 20:55:39 +0430 Subject: [PATCH 07/95] [RadioJavan] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/radiojavan.py | 71 ++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 youtube_dl/extractor/radiojavan.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3011b784d..df4a7419a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -391,6 +391,7 @@ from .pyvideo import PyvideoIE from .quickvid import QuickVidIE from .r7 import R7IE from .radiode import RadioDeIE +from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE from .rai import RaiIE diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py new file mode 100644 index 000000000..de90f9270 --- /dev/null +++ b/youtube_dl/extractor/radiojavan.py @@ -0,0 +1,71 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import( + parse_duration, + str_to_int +) + +class RadioJavanIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P[^/]+)/?' + _TEST = { + 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam', + 'md5': 'e85208ffa3ca8b83534fca9fe19af95b', + 'info_dict': { + 'id': 'chaartaar-ashoobam', + 'ext': 'mp4', + 'title': 'Chaartaar - Ashoobam', + 'description': 'Chaartaar - Ashoobam', + 'thumbnail': 're:^https?://.*\.jpe?g$', + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + urls = list() + prefix = 'https://media.rdjavan.com/media/music_video/' + + video_url_480 = self._search_regex( + r'RJ\.video480p = \'([^\']+)\'', webpage, '480 video url', fatal= False) + video_url_720 = self._search_regex( + r'RJ\.video720p = \'([^\']+)\'', webpage, '720 video url', fatal= False) + video_url_1080 = self._search_regex( + r'RJ\.video1080p = \'([^\']+)\'', webpage, '1080 video url', fatal= False) + + if video_url_480: + urls.append({'url': prefix + video_url_480, 'format': '480p'}) + if video_url_720: + urls.append({'url': prefix + video_url_720, 'format': '720p'}) + if video_url_1080: + urls.append({'url': prefix + video_url_1080, 'format': '1080p'}) + + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) + formats = [{ + 'url': url['url'], + 'format': url['format'] + } for url in urls] + + likes = self._search_regex( + r'([\d,]+)\s*likes', webpage, 'Likes Count', fatal=False ) + likes = likes.replace(',', '') + dislikes = self._search_regex( + r'([\d,]+)\s*dislikes', webpage, 'Dislikes Count', fatal=False ) + dislikes = dislikes.replace(',', '') + + plays = self._search_regex( + r'views_publish[">\s]*]+class="views">Plays: ([\d,]+)', webpage, 'Play Count', fatal=False ) + plays = plays.replace(',', '') + + return { + 'formats': formats, + 'id': display_id, + 'title': title, + 'description': title, # no description provided in RadioJavan + 'thumbnail': thumbnail, + 'like_count': str_to_int(likes), + 'dislike_count': str_to_int(dislikes), + 'viewCount': str_to_int(plays) + } \ No newline at end of file From cd341b6e0679c11b3698191615b18dbaaf2b0a76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 3 Apr 2015 19:37:35 +0200 Subject: [PATCH 08/95] [mixcloud] Fix extraction of like count (reported in #5231) --- youtube_dl/extractor/mixcloud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 21aea0c55..84f291558 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -97,7 +97,7 @@ class MixcloudIE(InfoExtractor): r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False) description = self._og_search_description(webpage) like_count = str_to_int(self._search_regex( - r'\bbutton-favorite\b.+m-ajax-toggle-count="([^"]+)"', + r'\bbutton-favorite\b[^>]+m-ajax-toggle-count="([^"]+)"', webpage, 'like count', fatal=False)) view_count = str_to_int(self._search_regex( [r' Date: Fri, 3 Apr 2015 23:42:53 +0600 Subject: [PATCH 09/95] [prosiebensat1] Fix bitrate (Closes #5350 closes #5351) --- youtube_dl/extractor/prosiebensat1.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 385681d06..c46aaada6 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -10,6 +10,7 @@ from ..compat import ( ) from ..utils import ( unified_strdate, + int_or_none, ) @@ -266,6 +267,9 @@ class ProSiebenSat1IE(InfoExtractor): urls_sources = urls_sources.values() def fix_bitrate(bitrate): + bitrate = int_or_none(bitrate) + if not bitrate: + return None return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate for source in urls_sources: From 16fa01291bd94703e2258a68bef1491d57f0dabc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 3 Apr 2015 23:44:13 +0600 Subject: [PATCH 10/95] [prosiebensat1] Fix test --- youtube_dl/extractor/prosiebensat1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index c46aaada6..7cc799664 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -25,7 +25,7 @@ class ProSiebenSat1IE(InfoExtractor): 'info_dict': { 'id': '2104602', 'ext': 'mp4', - 'title': 'Staffel 2, Episode 18 - Jahresrückblick', + 'title': 'Episode 18 - Staffel 2', 'description': 'md5:8733c81b702ea472e069bc48bb658fc1', 'upload_date': '20131231', 'duration': 5845.04, From ff556f5c09ec8700bb012a58a5e39505b887b774 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 00:30:37 +0600 Subject: [PATCH 11/95] Do not encode outtmpl twice (Closes #5288) --- youtube_dl/__init__.py | 4 ---- youtube_dl/options.py | 3 ++- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 852b2fc3d..1c8b411b7 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -189,10 +189,6 @@ def _real_main(argv=None): if opts.allsubtitles and not opts.writeautomaticsub: opts.writesubtitles = True - if sys.version_info < (3,): - # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) - if opts.outtmpl is not None: - opts.outtmpl = opts.outtmpl.decode(preferredencoding()) outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 35c7e5fb3..8e80e3759 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -13,6 +13,7 @@ from .compat import ( compat_kwargs, ) from .utils import ( + preferredencoding, write_string, ) from .version import __version__ @@ -797,7 +798,7 @@ def parseOpts(overrideArguments=None): # Workaround for Python 2.x, where argv is a byte list if sys.version_info < (3,): command_line_conf = [ - a.decode('utf-8', 'replace') for a in command_line_conf] + a.decode(preferredencoding(), 'replace') for a in command_line_conf] if '--ignore-config' in command_line_conf: system_conf = [] From 115c281672bd7479f87c48249f6a0186ac7d19cc Mon Sep 17 00:00:00 2001 From: snipem Date: Sat, 4 Apr 2015 12:31:48 +0200 Subject: [PATCH 12/95] [Gamersyde] Improved robustness, added duration and tests Fix for Json syntax is now less error prone for Json syntax inside of values. Extractor is now also using native Json handling. Added tests for several videos that were producing errors in the first place. --- youtube_dl/extractor/gamersyde.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py index c40106216..5c68a6891 100644 --- a/youtube_dl/extractor/gamersyde.py +++ b/youtube_dl/extractor/gamersyde.py @@ -8,7 +8,6 @@ from .common import InfoExtractor class GamersydeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_' - _TEST = { 'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html', 'md5': 'f38d400d32f19724570040d5ce3a505f', 'info_dict': { @@ -17,6 +16,11 @@ class GamersydeIE(InfoExtractor): 'title': 'Bloodborne - Birth of a hero', 'thumbnail': 're:^https?://.*\.jpg$', } + }, + { + 'url': 'http://www.gamersyde.com/hqstream_dark_souls_ii_scholar_of_the_first_sin_gameplay_part_1-34417_en.html', + 'info_dict': { + 'ext': 'mp4', } def _calculateDuration(self, durationString): @@ -27,7 +31,6 @@ class GamersydeIE(InfoExtractor): json = re.sub(r"{\s*(\w)", r'{"\1', json) json = re.sub(r",\s*(\w)", r',"\1', json) - json = re.sub(r"(\w): ", r'\1":', json) json = re.sub(r",\s*}", "}", json, flags=re.DOTALL) json = re.sub(r",\s*]", "]", json, flags=re.DOTALL) @@ -40,7 +43,6 @@ class GamersydeIE(InfoExtractor): filesJson = self._search_regex(r'playlist: (.*?)\}\);', webpage, 'files', flags=re.DOTALL) filesJson = self._fixJsonSyntax(filesJson) - data = json.loads(filesJson) playlist = data[0] From 3d24d997ae1f92686aa7edd0bfeed28353fbfb2e Mon Sep 17 00:00:00 2001 From: snipem Date: Sat, 4 Apr 2015 12:42:14 +0200 Subject: [PATCH 13/95] Fixed intendation of test cases Leaded to error on Linux machine --- youtube_dl/extractor/gamersyde.py | 45 ++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py index 5c68a6891..cc6fa4037 100644 --- a/youtube_dl/extractor/gamersyde.py +++ b/youtube_dl/extractor/gamersyde.py @@ -1,39 +1,62 @@ # coding: utf-8 from __future__ import unicode_literals import re -import json import time + from .common import InfoExtractor class GamersydeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_' + _TESTS = [{ 'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html', 'md5': 'f38d400d32f19724570040d5ce3a505f', 'info_dict': { 'id': '34371', 'ext': 'mp4', + 'duration': 372, 'title': 'Bloodborne - Birth of a hero', 'thumbnail': 're:^https?://.*\.jpg$', } - }, - { + }, { 'url': 'http://www.gamersyde.com/hqstream_dark_souls_ii_scholar_of_the_first_sin_gameplay_part_1-34417_en.html', + 'md5': '94bd7c3feff3275576cf5cb6c8a3a720', 'info_dict': { + 'id': '34417', 'ext': 'mp4', + 'duration': 270, + 'title': 'Dark Souls II: Scholar of the First Sin - Gameplay - Part 1', + 'thumbnail': 're:^https?://.*\.jpg$', + } + }, { + 'url': 'http://www.gamersyde.com/hqstream_grand_theft_auto_v_heists_trailer-33786_en.html', + 'md5': '65e442f5f340d571ece8c80d50700369', + 'info_dict': { + 'id': '33786', + 'ext': 'mp4', + 'duration': 59, + 'title': 'Grand Theft Auto V - Heists Trailer', + 'thumbnail': 're:^https?://.*\.jpg$', + } } + ] def _calculateDuration(self, durationString): - duration = time.strptime(durationString, "%M minutes %S seconds") + if (durationString.find("minutes") > -1): + duration = time.strptime(durationString, "%M minutes %S seconds") + else: + duration = time.strptime(durationString, "%S seconds") return duration.tm_min * 60 + duration.tm_sec def _fixJsonSyntax(self, json): - json = re.sub(r"{\s*(\w)", r'{"\1', json) - json = re.sub(r",\s*(\w)", r',"\1', json) json = re.sub(r",\s*}", "}", json, flags=re.DOTALL) json = re.sub(r",\s*]", "]", json, flags=re.DOTALL) - + json = json.replace('file: "', '"file": "') + json = json.replace('title: "', '"title": "') + json = json.replace('label: "', '"label": "') + json = json.replace('image: "', '"image": "') + json = json.replace('sources: [', '"sources": [') return json def _real_extract(self, url): @@ -42,13 +65,16 @@ class GamersydeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) filesJson = self._search_regex(r'playlist: (.*?)\}\);', webpage, 'files', flags=re.DOTALL) - filesJson = self._fixJsonSyntax(filesJson) - data = json.loads(filesJson) + data = self._parse_json(filesJson,video_id, transform_source=self._fixJsonSyntax) + playlist = data[0] formats = [] title = re.sub(r"[0-9]+ - ", "", playlist['title']) + + length = self._search_regex(r'(([0-9]{1,2} minutes ){0,1}[0-9]{1,2} seconds)', webpage, 'length') + duration = self._calculateDuration(length) for playlistEntry in playlist['sources']: format = { @@ -62,5 +88,6 @@ class GamersydeIE(InfoExtractor): 'id': video_id, 'title': title, 'formats': formats, + 'duration': duration, 'thumbnail': playlist['image'] } From 7cf97daf77f6419f2b965a199a3fb1e63b8771b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 16:45:41 +0600 Subject: [PATCH 14/95] [radiojavan] Simplify and extract upload date --- youtube_dl/extractor/radiojavan.py | 75 ++++++++++++++---------------- 1 file changed, 35 insertions(+), 40 deletions(-) diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py index de90f9270..73ab78d6d 100644 --- a/youtube_dl/extractor/radiojavan.py +++ b/youtube_dl/extractor/radiojavan.py @@ -1,12 +1,14 @@ -# coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import( - parse_duration, - str_to_int + unified_strdate, + str_to_int, ) + class RadioJavanIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P[^/]+)/?' _TEST = { @@ -16,56 +18,49 @@ class RadioJavanIE(InfoExtractor): 'id': 'chaartaar-ashoobam', 'ext': 'mp4', 'title': 'Chaartaar - Ashoobam', - 'description': 'Chaartaar - Ashoobam', 'thumbnail': 're:^https?://.*\.jpe?g$', + 'upload_date': '20150215', + 'view_count': int, + 'like_count': int, + 'dislike_count': int, } } def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - urls = list() - prefix = 'https://media.rdjavan.com/media/music_video/' + video_id = self._match_id(url) - video_url_480 = self._search_regex( - r'RJ\.video480p = \'([^\']+)\'', webpage, '480 video url', fatal= False) - video_url_720 = self._search_regex( - r'RJ\.video720p = \'([^\']+)\'', webpage, '720 video url', fatal= False) - video_url_1080 = self._search_regex( - r'RJ\.video1080p = \'([^\']+)\'', webpage, '1080 video url', fatal= False) + webpage = self._download_webpage(url, video_id) - if video_url_480: - urls.append({'url': prefix + video_url_480, 'format': '480p'}) - if video_url_720: - urls.append({'url': prefix + video_url_720, 'format': '720p'}) - if video_url_1080: - urls.append({'url': prefix + video_url_1080, 'format': '1080p'}) + formats = [{ + 'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path, + 'format_id': '%sp' % height, + 'height': height, + } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)] title = self._og_search_title(webpage) thumbnail = self._og_search_thumbnail(webpage) - formats = [{ - 'url': url['url'], - 'format': url['format'] - } for url in urls] - likes = self._search_regex( - r'([\d,]+)\s*likes', webpage, 'Likes Count', fatal=False ) - likes = likes.replace(',', '') - dislikes = self._search_regex( - r'([\d,]+)\s*dislikes', webpage, 'Dislikes Count', fatal=False ) - dislikes = dislikes.replace(',', '') + upload_date = unified_strdate(self._search_regex( + r'class="date_added">Date added: ([^<]+)<', + webpage, 'upload date', fatal=False)) - plays = self._search_regex( - r'views_publish[">\s]*]+class="views">Plays: ([\d,]+)', webpage, 'Play Count', fatal=False ) - plays = plays.replace(',', '') + view_count = str_to_int(self._search_regex( + r'class="views">Plays: ([\d,]+)', + webpage, 'view count', fatal=False)) + like_count = str_to_int(self._search_regex( + r'class="rating">([\d,]+) likes', + webpage, 'like count', fatal=False)) + dislike_count = str_to_int(self._search_regex( + r'class="rating">([\d,]+) dislikes', + webpage, 'dislike count', fatal=False)) return { - 'formats': formats, - 'id': display_id, + 'id': video_id, 'title': title, - 'description': title, # no description provided in RadioJavan 'thumbnail': thumbnail, - 'like_count': str_to_int(likes), - 'dislike_count': str_to_int(dislikes), - 'viewCount': str_to_int(plays) - } \ No newline at end of file + 'upload_date': upload_date, + 'view_count': view_count, + 'like_count': like_count, + 'dislike_count': dislike_count, + 'formats': formats, + } From 6e617ed0b6b5bb932f928f63c2bda36f5317468d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 16:47:09 +0600 Subject: [PATCH 15/95] Credit @mtp1376 for varzesh3 and radiojavan --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 48769320a..cf238176b 100644 --- a/AUTHORS +++ b/AUTHORS @@ -120,3 +120,4 @@ Jeff Buchbinder Amish Bhadeshia Joram Schrijver Will W. +Mohammad Teimori Pabandi From e9f65f87496d740fbb61e036c710bf2c174f1cc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 4 Apr 2015 13:11:55 +0200 Subject: [PATCH 16/95] [rtve] Extract a better quality video --- youtube_dl/extractor/rtve.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 13f071077..8d9be1b98 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -10,6 +10,7 @@ from ..compat import compat_urlparse from ..utils import ( float_or_none, remove_end, + std_headers, struct_unpack, ) @@ -84,13 +85,20 @@ class RTVEALaCartaIE(InfoExtractor): 'only_matching': True, }] + def _real_initialize(self): + user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8') + manager_info = self._download_json( + 'http://www.rtve.es/odin/loki/' + user_agent_b64, + None, 'Fetching manager info') + self._manager = manager_info['manager'] + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') info = self._download_json( 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, video_id)['page']['items'][0] - png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % video_id + png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id) png = self._download_webpage(png_url, video_id, 'Downloading url information') video_url = _decrypt_url(png) if not video_url.endswith('.f4m'): From ba9e68f40261355ceae5bb87c5707adc7f7beb2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 17:48:55 +0600 Subject: [PATCH 17/95] [utils] Drop trailing comma before closing brace --- test/test_utils.py | 6 ++++++ youtube_dl/utils.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index abaf1ab73..4e524aca3 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -470,6 +470,12 @@ class TestUtil(unittest.TestCase): self.assertEqual(d['x'], 1) self.assertEqual(d['y'], 'a') + on = js_to_json('["abc", "def",]') + self.assertEqual(json.loads(on), ['abc', 'def']) + + on = js_to_json('{"abc": "def",}') + self.assertEqual(json.loads(on), {'abc': 'def'}) + def test_clean_html(self): self.assertEqual(clean_html('a:\nb'), 'a: b') self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 90e0ed9ab..e1761265c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1577,7 +1577,7 @@ def js_to_json(code): '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'| [a-zA-Z_][.a-zA-Z_0-9]* ''', fix_kv, code) - res = re.sub(r',(\s*\])', lambda m: m.group(1), res) + res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res) return res From 5c29dbd0c76083eaf596f623fabb612575f71861 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 17:53:22 +0600 Subject: [PATCH 18/95] [gamersyde] Simplify --- youtube_dl/extractor/gamersyde.py | 103 ++++++++++++------------------ 1 file changed, 40 insertions(+), 63 deletions(-) diff --git a/youtube_dl/extractor/gamersyde.py b/youtube_dl/extractor/gamersyde.py index cc6fa4037..d545e01bb 100644 --- a/youtube_dl/extractor/gamersyde.py +++ b/youtube_dl/extractor/gamersyde.py @@ -1,14 +1,18 @@ -# coding: utf-8 from __future__ import unicode_literals + import re -import time from .common import InfoExtractor +from ..utils import ( + js_to_json, + parse_duration, + remove_start, +) class GamersydeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_' - _TESTS = [{ + _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_(?P[\da-z_]+)-(?P\d+)_[a-z]{2}\.html' + _TEST = { 'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html', 'md5': 'f38d400d32f19724570040d5ce3a505f', 'info_dict': { @@ -18,76 +22,49 @@ class GamersydeIE(InfoExtractor): 'title': 'Bloodborne - Birth of a hero', 'thumbnail': 're:^https?://.*\.jpg$', } - }, { - 'url': 'http://www.gamersyde.com/hqstream_dark_souls_ii_scholar_of_the_first_sin_gameplay_part_1-34417_en.html', - 'md5': '94bd7c3feff3275576cf5cb6c8a3a720', - 'info_dict': { - 'id': '34417', - 'ext': 'mp4', - 'duration': 270, - 'title': 'Dark Souls II: Scholar of the First Sin - Gameplay - Part 1', - 'thumbnail': 're:^https?://.*\.jpg$', - } - }, { - 'url': 'http://www.gamersyde.com/hqstream_grand_theft_auto_v_heists_trailer-33786_en.html', - 'md5': '65e442f5f340d571ece8c80d50700369', - 'info_dict': { - 'id': '33786', - 'ext': 'mp4', - 'duration': 59, - 'title': 'Grand Theft Auto V - Heists Trailer', - 'thumbnail': 're:^https?://.*\.jpg$', - } } - ] - - def _calculateDuration(self, durationString): - if (durationString.find("minutes") > -1): - duration = time.strptime(durationString, "%M minutes %S seconds") - else: - duration = time.strptime(durationString, "%S seconds") - return duration.tm_min * 60 + duration.tm_sec - - def _fixJsonSyntax(self, json): - - json = re.sub(r",\s*}", "}", json, flags=re.DOTALL) - json = re.sub(r",\s*]", "]", json, flags=re.DOTALL) - json = json.replace('file: "', '"file": "') - json = json.replace('title: "', '"title": "') - json = json.replace('label: "', '"label": "') - json = json.replace('image: "', '"image": "') - json = json.replace('sources: [', '"sources": [') - return json def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') - video_id = self._search_regex(r'-(.*?)_[a-z]{2}.html$', url, 'video_id') - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, display_id) - filesJson = self._search_regex(r'playlist: (.*?)\}\);', webpage, 'files', flags=re.DOTALL) - data = self._parse_json(filesJson,video_id, transform_source=self._fixJsonSyntax) - - playlist = data[0] + playlist = self._parse_json( + self._search_regex( + r'(?s)playlist: \[({.+?})\]\s*}\);', webpage, 'files'), + display_id, transform_source=js_to_json) formats = [] - - title = re.sub(r"[0-9]+ - ", "", playlist['title']) - - length = self._search_regex(r'(([0-9]{1,2} minutes ){0,1}[0-9]{1,2} seconds)', webpage, 'length') - duration = self._calculateDuration(length) - - for playlistEntry in playlist['sources']: - format = { - 'url': playlistEntry['file'], - 'format_id': playlistEntry['label'] + for source in playlist['sources']: + video_url = source.get('file') + if not video_url: + continue + format_id = source.get('label') + f = { + 'url': video_url, + 'format_id': format_id, } + m = re.search(r'^(?P\d+)[pP](?P\d+)fps', format_id) + if m: + f.update({ + 'height': int(m.group('height')), + 'fps': int(m.group('fps')), + }) + formats.append(f) + self._sort_formats(formats) - formats.append(format) + title = remove_start(playlist['title'], '%s - ' % video_id) + thumbnail = playlist.get('image') + duration = parse_duration(self._search_regex( + r'Length:([^<]+)<', webpage, 'duration', fatal=False)) return { 'id': video_id, + 'display_id': display_id, 'title': title, - 'formats': formats, + 'thumbnail': thumbnail, 'duration': duration, - 'thumbnail': playlist['image'] - } + 'formats': formats, + } From 79c21abba7c9902f00ddac83a2af29c36fe0e122 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 18:45:46 +0600 Subject: [PATCH 19/95] [utils] Add one more template to unified_strdate --- youtube_dl/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e1761265c..be3f62da7 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -759,6 +759,7 @@ def unified_strdate(date_str, day_first=True): ] if day_first: format_expressions.extend([ + '%d-%m-%Y', '%d.%m.%Y', '%d/%m/%Y', '%d/%m/%y', @@ -766,6 +767,7 @@ def unified_strdate(date_str, day_first=True): ]) else: format_expressions.extend([ + '%m-%d-%Y', '%m.%d.%Y', '%m/%d/%Y', '%m/%d/%y', From 15ac8413c78b991f2e99b6bdc538bc8c5ae8e8a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 19:08:48 +0600 Subject: [PATCH 20/95] [utils] Avoid treating `*-%Y` date template as UTC offset --- youtube_dl/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index be3f62da7..52f0dd09a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -730,7 +730,8 @@ def unified_strdate(date_str, day_first=True): # Replace commas date_str = date_str.replace(',', ' ') # %z (UTC offset) is only supported in python>=3.2 - date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) + if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str): + date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) # Remove AM/PM + timezone date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) From 8cf70de428c3fef910ba966fb56d39478226acc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 19:11:01 +0600 Subject: [PATCH 21/95] [test_utils] Add test for unified_strdate --- test/test_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_utils.py b/test/test_utils.py index 4e524aca3..2e3a6480c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -227,6 +227,7 @@ class TestUtil(unittest.TestCase): self.assertEqual( unified_strdate('2/2/2015 6:47:40 PM', day_first=False), '20150202') + self.assertEqual(unified_strdate('25-09-2014'), '20140925') def test_find_xpath_attr(self): testxml = ''' From 7c39a65543b809b681434246b84710349f5837aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 19:13:37 +0600 Subject: [PATCH 22/95] [pornovoisines] Simplify --- youtube_dl/extractor/pornovoisines.py | 111 ++++++++++++-------------- 1 file changed, 53 insertions(+), 58 deletions(-) diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py index efbb6a818..9688ed948 100644 --- a/youtube_dl/extractor/pornovoisines.py +++ b/youtube_dl/extractor/pornovoisines.py @@ -2,19 +2,23 @@ from __future__ import unicode_literals import re -import datetime import random -from ..compat import compat_urllib_parse from .common import InfoExtractor +from ..utils import ( + int_or_none, + float_or_none, + unified_strdate, +) + class PornoVoisinesIE(InfoExtractor): - _VALID_URL = r'^((?:http://)?(?:www\.)?pornovoisines.com)/showvideo/(\d+)/([^/]+)' + _VALID_URL = r'http://(?:www\.)?pornovoisines\.com/showvideo/(?P\d+)/(?P[^/]+)' - VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \ + _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \ '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4' - SERVER_NUMBERS = (1, 2) + _SERVER_NUMBERS = (1, 2) _TEST = { 'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/', @@ -23,79 +27,70 @@ class PornoVoisinesIE(InfoExtractor): 'id': '1285', 'display_id': 'recherche-appartement', 'ext': 'mp4', - 'title': "Recherche appartement", + 'title': 'Recherche appartement', + 'description': 'md5:819ea0b785e2a04667a1a01cdc89594e', + 'thumbnail': 're:^https?://.*\.jpg$', 'upload_date': '20140925', - 'view_count': int, 'duration': 120, - 'categories': ["Débutante", "Scénario", "Sodomie"], - 'description': 're:^Pour la .+ original...$', - 'thumbnail': 're:^http://', - 'uploader': "JMTV", + 'view_count': int, 'average_rating': float, - 'comment_count': int, + 'categories': ['Débutante', 'Scénario', 'Sodomie'], 'age_limit': 18, } } @classmethod - def build_video_url(cls, id): - server_nr = random.choice(cls.SERVER_NUMBERS) - return cls.VIDEO_URL_TEMPLATE % (server_nr, id) - - @staticmethod - def parse_upload_date(str): - return datetime.datetime.strptime(str, "%d-%m-%Y").strftime("%Y%m%d") - - @staticmethod - def parse_categories(str): - return map(lambda s: s.strip(), str.split(',')) + def build_video_url(cls, num): + return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - url_prefix = mobj.group(1) - id = mobj.group(2) - display_id = mobj.group(3) + video_id = mobj.group('id') + display_id = mobj.group('display_id') - webpage = self._download_webpage(url, id) + webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'

    (.+?)

    ', webpage, 'title', - flags=re.DOTALL) - url = self.build_video_url(id) - upload_date = self.parse_upload_date( - self._search_regex(r'Publié le (\d\d-\d\d-\d{4})', webpage, - 'upload date')) - view_count = int(self._search_regex(r'(\d+) vues', webpage, 'view count')) - duration = int(self._search_regex('Durée (\d+)', webpage, 'duration')) - categories = self.parse_categories(self._html_search_regex( - r'
  • (.+?)
  • ', webpage, "categories", - flags=re.DOTALL)) + video_url = self.build_video_url(video_id) + + title = self._html_search_regex( + r'

    (.+?)

    ', webpage, 'title', flags=re.DOTALL) description = self._html_search_regex( - r'
    (.+?)
    ', webpage, "description", - flags=re.DOTALL) - thumbnail = url_prefix + self._html_search_regex(re.compile( - '
    .*?(.+?)', webpage, - "uploader", flags=re.DOTALL)) - average_rating = float(self._search_regex(r'Note : (\d+,\d+)', - webpage, "average rating").replace(',', '.')) - comment_count = int(self._search_regex(r'\((\d+)\)', webpage, - "comment count")) + r'
    (.+?)
    ', + webpage, "description", fatal=False, flags=re.DOTALL) + + thumbnail = self._search_regex( + r'
    \s* Date: Sat, 4 Apr 2015 19:16:18 +0600 Subject: [PATCH 23/95] Credit @Roman2K for pornovoisines (#5264) --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index cf238176b..9c65dc1d4 100644 --- a/AUTHORS +++ b/AUTHORS @@ -121,3 +121,4 @@ Amish Bhadeshia Joram Schrijver Will W. Mohammad Teimori Pabandi +Roman Le Négrate From ff02a228e35ab11c9cfa6e0d000b7fd6de52a0c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 19:21:50 +0600 Subject: [PATCH 24/95] [test_execution] Fix test under python 2 @ windows --- test/test_execution.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/test_execution.py b/test/test_execution.py index f31e51558..620db080e 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -8,6 +8,9 @@ import unittest import sys import os import subprocess +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.utils import encodeArgument rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -31,7 +34,7 @@ class TestExecution(unittest.TestCase): def test_cmdline_umlauts(self): p = subprocess.Popen( - [sys.executable, 'youtube_dl/__main__.py', 'ä', '--version'], + [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'], cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE) _, stderr = p.communicate() self.assertFalse(stderr) From 4e8cc1e973da2656c46c5df84d4e85c5d78836ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 19:24:37 +0600 Subject: [PATCH 25/95] [radiojavan] Fix height --- youtube_dl/extractor/radiojavan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py index 73ab78d6d..c9eda9b53 100644 --- a/youtube_dl/extractor/radiojavan.py +++ b/youtube_dl/extractor/radiojavan.py @@ -34,7 +34,7 @@ class RadioJavanIE(InfoExtractor): formats = [{ 'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path, 'format_id': '%sp' % height, - 'height': height, + 'height': int(height), } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)] title = self._og_search_title(webpage) From 8fb2e5a4f5b9604f93964f9b6ae7062830e3bab2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 19:25:08 +0600 Subject: [PATCH 26/95] [radiojavan] Sort formats --- youtube_dl/extractor/radiojavan.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py index c9eda9b53..884c28420 100644 --- a/youtube_dl/extractor/radiojavan.py +++ b/youtube_dl/extractor/radiojavan.py @@ -36,6 +36,7 @@ class RadioJavanIE(InfoExtractor): 'format_id': '%sp' % height, 'height': int(height), } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)] + self._sort_formats(formats) title = self._og_search_title(webpage) thumbnail = self._og_search_thumbnail(webpage) From 4a3cdf81af9c22c45912b0b4c5845531d52d3a0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 20:00:23 +0600 Subject: [PATCH 27/95] [options] Restore some strings --- youtube_dl/options.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 9bded4521..d861ac458 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -359,7 +359,7 @@ def parseOpts(overrideArguments=None): video_format.add_option( '--max-quality', action='store', dest='format_limit', metavar='FORMAT', - help='Specify highest quality format to download') + help='Highest quality format to download') video_format.add_option( '-F', '--list-formats', action='store_true', dest='listformats', @@ -399,7 +399,7 @@ def parseOpts(overrideArguments=None): subtitles.add_option( '--sub-format', action='store', dest='subtitlesformat', metavar='FORMAT', default='best', - help='Specify subtitle format preference, for example: "srt" or "ass/srt/best"') + help='Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"') subtitles.add_option( '--sub-lang', '--sub-langs', '--srt-lang', action='callback', dest='subtitleslangs', metavar='LANGS', type='str', From f01855813b364dbd3e0c7fecacda84410d2780bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 20:01:24 +0600 Subject: [PATCH 28/95] [options] extractor is lowercase --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index d861ac458..ed2216d40 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -606,7 +606,7 @@ def parseOpts(overrideArguments=None): '%(format)s for the format description (like "22 - 1280x720" or "HD"), ' '%(format_id)s for the unique id of the format (like YouTube\'s itags: "137"), ' '%(upload_date)s for the upload date (YYYYMMDD), ' - '%(extractor)s for the provider (YouTube, metacafe, etc), ' + '%(extractor)s for the provider (youtube, metacafe, etc), ' '%(id)s for the video id, ' '%(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, ' '%(playlist_index)s for the position in the playlist. ' From 6b70a4eb7d4bcbe6812f78876b4aa9aa44a58fef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 20:02:29 +0600 Subject: [PATCH 29/95] [options] `Number` is a verb here --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index ed2216d40..2097a9436 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -626,7 +626,7 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '-A', '--auto-number', action='store_true', dest='autonumber', default=False, - help='[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number of downloaded files starting from 00000') + help='[deprecated; use -o "%(autonumber)s-%(title)s.%(ext)s" ] Number downloaded files starting from 00000') filesystem.add_option( '-t', '--title', action='store_true', dest='usetitle', default=False, From 1a48181a9ff872e4b8428603f70851c386a2790d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 20:09:11 +0600 Subject: [PATCH 30/95] [options] Fix load info help string --- youtube_dl/options.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 2097a9436..5720fb424 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -670,7 +670,7 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '--load-info', dest='load_info_filename', metavar='FILE', - help='Specify JSON file containing the video information (created with the "--write-json" option)') + help='JSON file containing the video information (created with the "--write-info-json" option)') filesystem.add_option( '--cookies', dest='cookiefile', metavar='FILE', From 1a68d39211cab61994c8717cce296b0baae8095a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 22:15:59 +0600 Subject: [PATCH 31/95] [aftonbladet] Fix extraction --- youtube_dl/extractor/aftonbladet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py index 8442019ea..4675585ca 100644 --- a/youtube_dl/extractor/aftonbladet.py +++ b/youtube_dl/extractor/aftonbladet.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class AftonbladetIE(InfoExtractor): - _VALID_URL = r'^http://tv\.aftonbladet\.se/webbtv.+?(?Particle[0-9]+)\.ab(?:$|[?#])' + _VALID_URL = r'http://tv\.aftonbladet\.se/webbtv.+?(?Particle[0-9]+)\.ab(?:$|[?#])' _TEST = { 'url': 'http://tv.aftonbladet.se/webbtv/nyheter/vetenskap/rymden/article36015.ab', 'info_dict': { From 8e1f93747338d64f6855c0f7f9467714bf56db93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 22:19:34 +0600 Subject: [PATCH 32/95] [aftonbladet] Modernize --- youtube_dl/extractor/aftonbladet.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/aftonbladet.py b/youtube_dl/extractor/aftonbladet.py index 4675585ca..a117502bc 100644 --- a/youtube_dl/extractor/aftonbladet.py +++ b/youtube_dl/extractor/aftonbladet.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import int_or_none class AftonbladetIE(InfoExtractor): @@ -43,9 +44,9 @@ class AftonbladetIE(InfoExtractor): formats.append({ 'url': 'http://%s:%d/%s/%s' % (p['address'], p['port'], p['path'], p['filename']), 'ext': 'mp4', - 'width': fmt['width'], - 'height': fmt['height'], - 'tbr': fmt['bitrate'], + 'width': int_or_none(fmt.get('width')), + 'height': int_or_none(fmt.get('height')), + 'tbr': int_or_none(fmt.get('bitrate')), 'protocol': 'http', }) self._sort_formats(formats) @@ -54,9 +55,9 @@ class AftonbladetIE(InfoExtractor): 'id': video_id, 'title': internal_meta_json['title'], 'formats': formats, - 'thumbnail': internal_meta_json['imageUrl'], - 'description': internal_meta_json['shortPreamble'], - 'timestamp': internal_meta_json['timePublished'], - 'duration': internal_meta_json['duration'], - 'view_count': internal_meta_json['views'], + 'thumbnail': internal_meta_json.get('imageUrl'), + 'description': internal_meta_json.get('shortPreamble'), + 'timestamp': int_or_none(internal_meta_json.get('timePublished')), + 'duration': int_or_none(internal_meta_json.get('duration')), + 'view_count': int_or_none(internal_meta_json.get('views')), } From ed676e8c0ab087acb8e5e26a2a8d94a47fe10c33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 22:27:25 +0600 Subject: [PATCH 33/95] [bliptv] Check format URLs Some formats are now 404 --- youtube_dl/extractor/bliptv.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index 8c7ba4b91..b632ce967 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -172,6 +172,7 @@ class BlipTVIE(InfoExtractor): 'width': int_or_none(media_content.get('width')), 'height': int_or_none(media_content.get('height')), }) + self._check_formats(formats, video_id) self._sort_formats(formats) subtitles = self.extract_subtitles(video_id, subtitles_urls) From 184a1974414bc91c5804251a33b8dd5cba1f75d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 22:43:34 +0600 Subject: [PATCH 34/95] [culturebox] Check for unavailable videos --- youtube_dl/extractor/francetv.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 170d68075..20acc96bd 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -275,7 +275,12 @@ class CultureboxIE(FranceTVBaseInfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) name = mobj.group('name') + webpage = self._download_webpage(url, name) + + if ">Ce live n'est plus disponible en replay<" in webpage: + raise ExtractorError('Video %s is not available' % name, expected=True) + video_id, catalogue = self._search_regex( r'"http://videos\.francetv\.fr/video/([^@]+@[^"]+)"', webpage, 'video id').split('@') From aed2d4b31e331422fefa304ab3fa49c050ea13e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 22:50:13 +0600 Subject: [PATCH 35/95] [culturebox] Replace test --- youtube_dl/extractor/francetv.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 20acc96bd..55b8e9d9e 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -260,15 +260,18 @@ class CultureboxIE(FranceTVBaseInfoExtractor): _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P.*?)(\?|$)' _TEST = { - 'url': 'http://culturebox.francetvinfo.fr/festivals/dans-les-jardins-de-william-christie/dans-les-jardins-de-william-christie-le-camus-162553', - 'md5': '5ad6dec1ffb2a3fbcb20cc4b744be8d6', + 'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511', 'info_dict': { - 'id': 'EV_22853', - 'ext': 'flv', - 'title': 'Dans les jardins de William Christie - Le Camus', - 'description': 'md5:4710c82315c40f0c865ca8b9a68b5299', - 'upload_date': '20140829', - 'timestamp': 1409317200, + 'id': 'EV_50111', + 'ext': 'mp4', + 'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne", + 'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9', + 'upload_date': '20150320', + 'timestamp': 1426892400, + 'duration': 2760.9, + }, + 'params': { + 'skip_download': True, }, } From f05d0e73c6d38d86393f8f552fbfcdda80f37607 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 22:52:25 +0600 Subject: [PATCH 36/95] [francetv] Fix duration --- youtube_dl/extractor/francetv.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 55b8e9d9e..0d92ef9c4 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -14,6 +14,7 @@ from ..utils import ( clean_html, ExtractorError, int_or_none, + float_or_none, parse_duration, ) @@ -86,7 +87,7 @@ class FranceTVBaseInfoExtractor(InfoExtractor): 'title': info['titre'], 'description': clean_html(info['synopsis']), 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']), - 'duration': parse_duration(info['duree']), + 'duration': float_or_none(info.get('real_duration'), 1000) or parse_duration(info['duree']), 'timestamp': int_or_none(info['diffusion']['timestamp']), 'formats': formats, } From bc03228ab52672666b79c9fadfbf886f8d8bf5d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 23:02:04 +0600 Subject: [PATCH 37/95] [francetv] Improve formats extraction --- youtube_dl/extractor/francetv.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index 0d92ef9c4..fd3e7aa7b 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -16,6 +16,7 @@ from ..utils import ( int_or_none, float_or_none, parse_duration, + determine_ext, ) @@ -51,7 +52,8 @@ class FranceTVBaseInfoExtractor(InfoExtractor): if not video_url: continue format_id = video['format'] - if video_url.endswith('.f4m'): + ext = determine_ext(video_url) + if ext == 'f4m': if georestricted: # See https://github.com/rg3/youtube-dl/issues/3963 # m3u8 urls work fine @@ -61,12 +63,9 @@ class FranceTVBaseInfoExtractor(InfoExtractor): 'http://hdfauth.francetv.fr/esi/urltokengen2.html?url=%s' % video_url_parsed.path, video_id, 'Downloading f4m manifest token', fatal=False) if f4m_url: - f4m_formats = self._extract_f4m_formats(f4m_url, video_id) - for f4m_format in f4m_formats: - f4m_format['preference'] = 1 - formats.extend(f4m_formats) - elif video_url.endswith('.m3u8'): - formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4')) + formats.extend(self._extract_f4m_formats(f4m_url, video_id, 1, format_id)) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats(video_url, video_id, 'mp4', m3u8_id=format_id)) elif video_url.startswith('rtmp'): formats.append({ 'url': video_url, From e21a55abcc502abac559027551751ff84d215077 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 23:05:25 +0600 Subject: [PATCH 38/95] [extractor/common] Remove f4m section It's now provided by `f4m_id` --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e5245ec3f..530c449c1 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -822,7 +822,7 @@ class InfoExtractor(object): (media_el.attrib.get('href') or media_el.attrib.get('url'))) tbr = int_or_none(media_el.attrib.get('bitrate')) formats.append({ - 'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])), + 'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])), 'url': manifest_url, 'ext': 'flv', 'tbr': tbr, From ac651e974ef8da74df35e5ac5464cc03e35bb2d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 23:06:16 +0600 Subject: [PATCH 39/95] [culturebox] Fix test --- youtube_dl/extractor/francetv.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index fd3e7aa7b..edf555b29 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -261,18 +261,16 @@ class CultureboxIE(FranceTVBaseInfoExtractor): _TEST = { 'url': 'http://culturebox.francetvinfo.fr/live/musique/musique-classique/le-livre-vermeil-de-montserrat-a-la-cathedrale-delne-214511', + 'md5': '9b88dc156781c4dbebd4c3e066e0b1d6', 'info_dict': { 'id': 'EV_50111', - 'ext': 'mp4', + 'ext': 'flv', 'title': "Le Livre Vermeil de Montserrat à la Cathédrale d'Elne", 'description': 'md5:f8a4ad202e8fe533e2c493cc12e739d9', 'upload_date': '20150320', 'timestamp': 1426892400, 'duration': 2760.9, }, - 'params': { - 'skip_download': True, - }, } def _real_extract(self, url): From aff84bec07fc1919591827543845790074b0194f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 23:17:09 +0600 Subject: [PATCH 40/95] [drtv] Check for unavailable videos --- youtube_dl/extractor/drtv.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 8257e35a4..bd1109549 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -26,6 +26,10 @@ class DRTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + if '>Programmet er ikke længere tilgængeligt' in webpage: + raise ExtractorError( + 'Video %s is not available' % video_id, expected=True) + video_id = self._search_regex( r'data-(?:material-identifier|episode-slug)="([^"]+)"', webpage, 'video id') From 7d2546397209deab14a0ebad6c933ed97e73fe41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 23:19:28 +0600 Subject: [PATCH 41/95] [drtv] Update test --- youtube_dl/extractor/drtv.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index bd1109549..f25ab319e 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor, ExtractorError @@ -8,16 +9,16 @@ class DRTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P[\da-z-]+)(?:[/#?]|$)' _TEST = { - 'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8', - 'md5': '4a7e1dd65cdb2643500a3f753c942f25', + 'url': 'https://www.dr.dk/tv/se/boern/ultra/panisk-paske/panisk-paske-5', + 'md5': 'dc515a9ab50577fa14cc4e4b0265168f', 'info_dict': { - 'id': 'partiets-mand-7-8', + 'id': 'panisk-paske-5', 'ext': 'mp4', - 'title': 'Partiets mand (7:8)', - 'description': 'md5:a684b90a8f9336cd4aab94b7647d7862', - 'timestamp': 1403047940, - 'upload_date': '20140617', - 'duration': 1299.040, + 'title': 'Panisk Påske (5)', + 'description': 'md5:ca14173c5ab24cd26b0fcc074dff391c', + 'timestamp': 1426984612, + 'upload_date': '20150322', + 'duration': 1455, }, } From 218d6bcc05bd84d8f69a7b764702dc24acb2f761 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 23:28:47 +0600 Subject: [PATCH 42/95] [dreisat] Capture status errors --- youtube_dl/extractor/dreisat.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 69ca75423..b88460a23 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -3,7 +3,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import ( + ExtractorError, + unified_strdate, +) class DreiSatIE(InfoExtractor): @@ -28,6 +31,15 @@ class DreiSatIE(InfoExtractor): details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id details_doc = self._download_xml(details_url, video_id, 'Downloading video details') + status_code = details_doc.find('./status/statuscode') + if status_code is not None and status_code.text != 'ok': + code = status_code.text + if code == 'notVisibleAnymore': + message = 'Video %s is not available' % video_id + else: + message = '%s returned error: %s' % (self.IE_NAME, code) + raise ExtractorError(message, expected=True) + thumbnail_els = details_doc.findall('.//teaserimage') thumbnails = [{ 'width': int(te.attrib['key'].partition('x')[0]), From a319c33d8b8c4a7ac1d2f8dd739508b041d960b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 23:30:38 +0600 Subject: [PATCH 43/95] [dreisat] Update test --- youtube_dl/extractor/dreisat.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index b88460a23..05bb22ddf 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -13,15 +13,15 @@ class DreiSatIE(InfoExtractor): IE_NAME = '3sat' _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P[0-9]+)$' _TEST = { - 'url': 'http://www.3sat.de/mediathek/index.php?obj=36983', - 'md5': '9dcfe344732808dbfcc901537973c922', + 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918', + 'md5': 'be37228896d30a88f315b638900a026e', 'info_dict': { - 'id': '36983', + 'id': '45918', 'ext': 'mp4', - 'title': 'Kaffeeland Schweiz', - 'description': 'md5:cc4424b18b75ae9948b13929a0814033', + 'title': 'Waidmannsheil', + 'description': 'md5:cce00ca1d70e21425e72c86a98a56817', 'uploader': '3sat', - 'upload_date': '20130622' + 'upload_date': '20140913' } } From fefc9d121d32321d3609e131e488c443d7af962a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 23:33:07 +0600 Subject: [PATCH 44/95] [dump] Fix title extraction --- youtube_dl/extractor/dump.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dump.py b/youtube_dl/extractor/dump.py index 6b651778a..ff78d4fd2 100644 --- a/youtube_dl/extractor/dump.py +++ b/youtube_dl/extractor/dump.py @@ -28,12 +28,12 @@ class DumpIE(InfoExtractor): video_url = self._search_regex( r's1.addVariable\("file",\s*"([^"]+)"', webpage, 'video URL') - thumb = self._og_search_thumbnail(webpage) - title = self._search_regex(r'([^"]+)', webpage, 'title') + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) return { 'id': video_id, 'title': title, 'url': video_url, - 'thumbnail': thumb, + 'thumbnail': thumbnail, } From f67dcc09f5e2f68032e379133e53b07e58c544dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 4 Apr 2015 23:36:45 +0600 Subject: [PATCH 45/95] [eagleplatform] Skip georestricted test --- youtube_dl/extractor/eagleplatform.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dl/extractor/eagleplatform.py index 7173371ee..688dfc2f7 100644 --- a/youtube_dl/extractor/eagleplatform.py +++ b/youtube_dl/extractor/eagleplatform.py @@ -45,6 +45,7 @@ class EaglePlatformIE(InfoExtractor): 'duration': 216, 'view_count': int, }, + 'skip': 'Georestricted', }] def _handle_error(self, response): From 27fe5e347350484009e79251ec7ef97484219481 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 5 Apr 2015 00:00:04 +0600 Subject: [PATCH 46/95] [ellentv] Make video url extraction fatal --- youtube_dl/extractor/ellentv.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ellentv.py b/youtube_dl/extractor/ellentv.py index fc92ff825..3a7962144 100644 --- a/youtube_dl/extractor/ellentv.py +++ b/youtube_dl/extractor/ellentv.py @@ -40,14 +40,15 @@ class EllenTVIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_url = self._html_search_meta('VideoURL', webpage, 'url') + + video_url = self._html_search_meta('VideoURL', webpage, 'url', fatal=True) title = self._og_search_title(webpage, default=None) or self._search_regex( r'pageName\s*=\s*"([^"]+)"', webpage, 'title') description = self._html_search_meta( 'description', webpage, 'description') or self._og_search_description(webpage) timestamp = parse_iso8601(self._search_regex( r'", webpage) + + formats = [] + for q in sorted(qualities): + formats.append({ + "format_id": q, + "format": q, + "ext": "mp4", + "url": "http://spankbang.com/_{}/{}/title/{}__mp4".format(video_id, stream_key, q) + }) + + return { + "id": video_id, + "title": title, + "description": self._og_search_description(webpage), + "formats": formats + } + +# vim: tabstop=4 expandtab From 2e7daef50220ee90e8a2e2b979600f8bd4a3e40e Mon Sep 17 00:00:00 2001 From: newtonelectron Date: Sun, 5 Apr 2015 13:43:21 -0700 Subject: [PATCH 52/95] [SpankBang] Use python2.6 compatible string formatting spec --- youtube_dl/extractor/spankbang.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 8e845ef26..61fd64d17 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -25,7 +25,7 @@ class SpankBangIE(InfoExtractor): "format_id": q, "format": q, "ext": "mp4", - "url": "http://spankbang.com/_{}/{}/title/{}__mp4".format(video_id, stream_key, q) + "url": "http://spankbang.com/_{0}/{1}/title/{2}__mp4".format(video_id, stream_key, q) }) return { From 5c1d459ae91d2681be88023e9056dcae3f48a70a Mon Sep 17 00:00:00 2001 From: newtonelectron Date: Sun, 5 Apr 2015 13:57:59 -0700 Subject: [PATCH 53/95] [SpankBang] Add test --- youtube_dl/extractor/spankbang.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 61fd64d17..2e20a5ad5 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -8,6 +8,20 @@ class SpankBangIE(InfoExtractor): """Extractor for http://spankbang.com""" _VALID_URL = r"https?://(?:www\.)?spankbang\.com/(?P\w+)/video/.*" + + _TEST = { + "url": "http://spankbang.com/3vvn/video/fantasy+solo", + "md5": "1cc433e1d6aa14bc376535b8679302f7", + "info_dict": { + "id": "3vvn", + "title": "fantasy solo", + "description": "Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.", + "format": "720p", + "format_id": "720p", + "ext": "mp4", + "url": "re:http://spankbang.com/_3vvn/IjE0MjgyNjY5MTcuMzUi.IaGrcF-vDrvktMhjd-1fWixiCzU/title/720p__mp4" + } + } def _real_extract(self, url): video_id = self._match_id(url) From c7ac5dce8c692f82f10363e40a7085ac53113bc8 Mon Sep 17 00:00:00 2001 From: newtonelectron Date: Sun, 5 Apr 2015 14:02:05 -0700 Subject: [PATCH 54/95] [SpankBang] Remove regexp type prefix from _TEST url. --- youtube_dl/extractor/spankbang.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py index 2e20a5ad5..d0b5ba278 100644 --- a/youtube_dl/extractor/spankbang.py +++ b/youtube_dl/extractor/spankbang.py @@ -19,7 +19,7 @@ class SpankBangIE(InfoExtractor): "format": "720p", "format_id": "720p", "ext": "mp4", - "url": "re:http://spankbang.com/_3vvn/IjE0MjgyNjY5MTcuMzUi.IaGrcF-vDrvktMhjd-1fWixiCzU/title/720p__mp4" + "url": "http://spankbang.com/_3vvn/IjE0MjgyNjY5MTcuMzUi.IaGrcF-vDrvktMhjd-1fWixiCzU/title/720p__mp4" } } From d2272fcf6e2796583b516f4786733577459dec43 Mon Sep 17 00:00:00 2001 From: felix Date: Mon, 6 Apr 2015 09:54:19 +0200 Subject: [PATCH 55/95] crooksandliars.com extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/crooksandliars.py | 71 ++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 youtube_dl/extractor/crooksandliars.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7eb9b4fbb..dc272af82 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -90,6 +90,7 @@ from .commonmistakes import CommonMistakesIE, UnicodeBOMIE from .condenast import CondeNastIE from .cracked import CrackedIE from .criterion import CriterionIE +from .crooksandliars import CrooksAndLiarsIE, CrooksAndLiarsArticleIE from .crunchyroll import ( CrunchyrollIE, CrunchyrollShowPlaylistIE diff --git a/youtube_dl/extractor/crooksandliars.py b/youtube_dl/extractor/crooksandliars.py new file mode 100644 index 000000000..afccca354 --- /dev/null +++ b/youtube_dl/extractor/crooksandliars.py @@ -0,0 +1,71 @@ +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..utils import ( + mimetype2ext, +) + + +class CrooksAndLiarsIE(InfoExtractor): + _VALID_URL = r'(?:https?:)?//embed.crooksandliars.com/embed/(?P[A-Za-z0-9]+)(?:$|[?#])' + + _TESTS = [{ + 'url': 'https://embed.crooksandliars.com/embed/8RUoRhRi', + 'info_dict': { + 'id': 'https://embed.crooksandliars.com/embed/8RUoRhRi', + 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!", + 'description': "Fox News, Fox & Friends Weekend, April 4, 2015. Read more... http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists", + 'timestamp': 1428207000, + 'thumbnail': '//crooksandliars.com/files/mediaposters/2015/04/31235.jpg?ts=1428207050', + 'uploader': "Heather", + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + manifest = json.loads(self._html_search_regex(r'var manifest = ({.*?})\n', webpage, 'manifest JSON')) + + formats = [] + for item in manifest['flavors']: + if not item['mime'].startswith('video/'): # XXX: or item['exclude']? + continue + formats.append({ + 'format_id': item['type'], + 'ext': mimetype2ext(item['mime']), + 'url': item['url'], + }) + + # XXX: manifest['url']? + return { + 'url': url, + 'id': video_id, + 'uploader': manifest['author'], + 'title': manifest['title'], + 'description': manifest['description'], + 'thumbnail': manifest['poster'], + 'duration': manifest['duration'], + 'timestamp': int(manifest['created']), + 'formats': formats, + } + +class CrooksAndLiarsArticleIE(InfoExtractor): + _VALID_URL = r'(?:https?:)?//crooksandliars.com/\d+/\d+/(?P[a-z\-]+)(?:/|$)' + + _TESTS = [{ + 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + player_url = self._proto_relative_url(self._html_search_regex(r'