From 46fc798e15c82dff75e0a3d50ac05361e63ed2c5 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Thu, 20 Jun 2019 13:03:30 +0300 Subject: [PATCH 1/2] extract shares and likes. --- youtube_dl/extractor/facebook.py | 47 ++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index cc233b651..3ba2e648f 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -460,18 +460,12 @@ class FacebookIE(InfoExtractor): 'uploader_id', default=None) or self._search_regex( r'[\'\"]ownerid[\'\"]\s*:\s*[\'\"](\d+)[\'\"]', tahoe_data.secondary, 'uploader_id', fatal=False) + thumbnail = self._og_search_thumbnail(webpage) - view_count = parse_count(self._search_regex( - r'\bpostViewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', - default=None) or self._search_regex( - r'[\'\"]postViewCount[\'\"]\s*:\s*(\d+)', tahoe_data.secondary, 'view count', - default=None) or self._search_regex( - r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', - default=None) or self._search_regex( - r'[\'\"]viewCount[\'\"]\s*:\s*(\d+)', tahoe_data.secondary, 'view count', - default=None) - ) + view_count = parse_count(self._extract_meta_count(['postViewCount', 'viewCount'], webpage, tahoe_data, 'likes')) + likes_count = parse_count(self._extract_likes(webpage, tahoe_data)) + shares_count = parse_count(self._extract_meta_count(['sharecount'], webpage, tahoe_data, 'shares')) info_dict = { 'id': video_id, @@ -483,11 +477,42 @@ class FacebookIE(InfoExtractor): 'view_count': view_count, 'uploader_id': uploader_id, 'is_live': is_live, - 'live_status': live_status + 'live_status': live_status, + 'likes': likes_count, + 'shares': shares_count } return webpage, info_dict + def _extract_meta_count(self, fields, webpage, tahoe_data, name, ): + value = None + + for f in fields: + if value: + break + value = self._search_regex( + r'\b%s\s*:\s*["\']([\d,.]+)' % f, webpage, name, + default=None + ) + if value: + break + + value = self._search_regex( + r'[\'\"]%s[\'\"]\s*:\s*(\d+)' % f, tahoe_data.secondary, name, + default=None) + + return value + + def _extract_likes(self, webpage, tahoe_data): + values = re.findall(r'\blikecount\s*:\s*["\']([\d,.]+)', webpage) + if values: + return values[-1] + + + values = re.findall(r'[\'\"]\blikecount[\'\"]\s*:\s*(\d+)', tahoe_data.secondary) + if values: + return values[-1] + def _real_extract(self, url): video_id = self._match_id(url) From a5456c043a1f1231ff98421252573b354e0c084f Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Thu, 20 Jun 2019 13:14:42 +0300 Subject: [PATCH 2/2] . --- youtube_dl/extractor/facebook.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 3ba2e648f..1b91c9036 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -478,8 +478,8 @@ class FacebookIE(InfoExtractor): 'uploader_id': uploader_id, 'is_live': is_live, 'live_status': live_status, - 'likes': likes_count, - 'shares': shares_count + 'like_count': likes_count, + 'share_count': shares_count } return webpage, info_dict