From ccdbe1e8faac13fa78950f3001d6cdbc02798c70 Mon Sep 17 00:00:00 2001 From: Rob van Bekkum Date: Sat, 27 Feb 2016 19:54:38 +0100 Subject: [PATCH] Removed redundant comments, needless escaping of slashes in regular expressions, using scala argument of int_or_none and direct use of the generic extractor for already supported embeds --- youtube_dl/extractor/lcp.py | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/lcp.py b/youtube_dl/extractor/lcp.py index be586e1f1..eb3f16698 100644 --- a/youtube_dl/extractor/lcp.py +++ b/youtube_dl/extractor/lcp.py @@ -8,7 +8,7 @@ from ..utils import ( class LcpIE(InfoExtractor): IE_NAME = 'LCP' - _VALID_URL = r'https?:\/\/(?:www\.)?lcp\.fr\/(?:[^\/]+/)*(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?lcp\.fr/(?:[^\/]+/)*(?P[^/]+)' _TESTS = [{ 'url': 'http://www.lcp.fr/la-politique-en-video/schwartzenberg-prg-preconise-francois-hollande-de-participer-une-primaire', @@ -31,24 +31,18 @@ class LcpIE(InfoExtractor): }] def _real_extract(self, url): - """Extracts the information for a given url and returns it in a dictionary""" display_id = self._match_id(url) - - # Extract the web page webpage = self._download_webpage(url, display_id) - # Extract the required info of the media files + # Extract the required info of the media files gathered in a dictionary media_files_info = self.__extract_from_webpage(display_id, webpage) - # Some web pages embed videos from other platforms like dailymotion, therefore we pass on these URL + # Some web pages embed videos from other platforms like dailymotion, therefore we pass on these URLs if media_files_info is None: - return self.url_result(self.__extract_embed_url(webpage)) + return self.url_result(url, 'Generic') - # Extract the video formats from the media info video_formats = self.__get_video_formats(media_files_info) - # Extract the thumbnails from the media info video_thumbnails = self.__get_thumbnails(media_files_info) - # Return the dictionary with the information about the video to download return { 'id': media_files_info['EntryName'], 'title': self._og_search_title(webpage), @@ -59,22 +53,19 @@ class LcpIE(InfoExtractor): def __extract_from_webpage(self, display_id, webpage): """Extracts the media info JSON object for the video for the provided web page.""" embed_url = self.__extract_embed_url(webpage) - embed_regex = r'(?:[a-zA-Z0-9]+\.)?lcp\.fr\/embed\/(?P[A-za-z0-9]+)\/(?P[A-za-z0-9]+)\/(?P[^\/]+)' + embed_regex = r'(?:[a-zA-Z0-9]+\.)?lcp\.fr/embed/(?P[A-za-z0-9]+)/(?P[A-za-z0-9]+)/(?P[^\/]+)' - # Extract the identifying attributes from the embed url of the web page clip_id = self._search_regex(embed_regex, embed_url, 'clip id', group='clip_id', fatal=False) player_id = self._search_regex(embed_regex, embed_url, 'player id', group='player_id', fatal=False) skin_name = self._search_regex(embed_regex, embed_url, 'skin name', group='skin_name', fatal=False) - # Check whether the extraction of the clip id, player id or skin name + # Check whether the matches failed, which might be when dealing with other players (e.g., dailymotion stream) if (clip_id is None) or (player_id is None) or (skin_name is None): return None - # Extract the video url from the embedded player return self.__extract_from_player(display_id, clip_id, player_id, skin_name) def __extract_embed_url(self, webpage): - """Extracts the embedded player url for the video.""" return self._search_regex( r']+src=(["\'])(?P.+?)\1', webpage, 'embed url', group='url') @@ -86,19 +77,17 @@ class LcpIE(InfoExtractor): player_id) arkena_info = self._download_webpage(arkena_url, 'clip_info_' + clip_id) - # Extract the json containing information about the video files arkena_info_regex = r'\?\((?P.*)\);' info_json = self._parse_json(self._search_regex(arkena_info_regex, arkena_info, 'json', group='json'), display_id) - # All videos are part of a playlist, a single video is in a playlist of size 1 + # All videos are part of a playlist, a single video is also put in a playlist media_files_info = info_json.get('Playlist') if media_files_info is not None: media_files_info = media_files_info[0] return media_files_info def __get_thumbnails(self, media_files_info): - """Retrieves the thumbnails contained in the media info""" thumbnails = [] media_thumbnail_info = media_files_info.get('MediaInfo', {}).get('Poster') if media_thumbnail_info is not None: @@ -110,7 +99,6 @@ class LcpIE(InfoExtractor): return thumbnails def __get_video_formats(self, media_files_info): - """Retrieves the video formats contained in the media file info""" formats = [] media_files = media_files_info.get('MediaFiles') @@ -121,14 +109,11 @@ class LcpIE(InfoExtractor): return formats def __get_mp4_video_formats(self, media_files_json): - """Retrieves all mp4 video formats contained in the media file info""" formats = [] mp4_files_json = media_files_json.get('Mp4') if mp4_files_json is not None: for video_info in mp4_files_json: - bitrate = int_or_none(video_info.get('Bitrate')) - if bitrate is not None: - bitrate /= 1000 # Set bitrate to KBit/s + bitrate = int_or_none(video_info.get('Bitrate'), scale=0.001) # Scale bitrate to KBit/s formats.append({ 'url': video_info.get('Url'), 'ext': 'mp4',