From 3f8ec99719cc91c9ec4fe3a6b32e4930e2c27c23 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Sat, 22 Jun 2019 22:54:09 +0300 Subject: [PATCH 1/3] extrace page likes. --- youtube_dl/extractor/facebook.py | 50 ++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 1b91c9036..14cb54966 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -24,6 +24,7 @@ from ..utils import ( sanitized_Request, try_get, urlencode_postdata, + update_url_query ) @@ -481,6 +482,8 @@ class FacebookIE(InfoExtractor): 'like_count': likes_count, 'share_count': shares_count } + if uploader_id: + info_dict['uploader_like_count'] = FacebookAjax(self, webpage, uploader_id).page_likes return webpage, info_dict @@ -587,6 +590,53 @@ class FacebookTahoeData: return tahoe_request_data, tahoe_request_headers +class FacebookAjax: + HOVER_URL_TEMPLATE = 'https://www.facebook.com/ajax/hovercard/user.php?id=111&fb_dtsg_ag=x&endpoint=%2Fajax%2Fhovercard%2Fuser.php%3Fid%3D111&__a=1' + + def __init__(self, extractor, page, page_id): + self._page = page + self._page_id = page_id + self._extractor = extractor + self._hover_data = None + + def _get_hover_data(self): + if self._hover_data: + data = self._hover_data + else: + data = self._extractor._download_webpage( + self._get_request_url(self._page_id), self._page_id + ) + return '' if not data else data + + @property + def hover(self): + return self._get_hover_data() + + @property + def page_likes(self): + return parse_count( + self._extractor._search_regex(r'\/span>([\d,]+) likes', self.hover, 'uploader_likes', default=None) + ) + + def _get_request_url(self, page_id): + return update_url_query(self.HOVER_URL_TEMPLATE, + { + + 'id': page_id, + 'endpoint': '/ajax/hovercard/user.php?id=%s' % page_id, + '__a': 1, + '__pc': self._extractor._search_regex( + r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', self._page, + 'pkg cohort', default='PHASED:DEFAULT'), + '__rev': self._extractor._search_regex( + r'client_revision["\']\s*:\s*(\d+),', self._page, + 'client revision', default='3944515'), + 'fb_dtsg': self._extractor._search_regex( + r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"', + self._page, 'dtsg token', default=''), + }) + + class FacebookPluginsVideoIE(InfoExtractor): _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?Phttps.+)' From c8a0a6d791501c7d15534405e7a70c6e3110a792 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Sat, 22 Jun 2019 22:59:11 +0300 Subject: [PATCH 2/3] not fatal for now. --- youtube_dl/extractor/facebook.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 14cb54966..1021345bf 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -614,10 +614,13 @@ class FacebookAjax: @property def page_likes(self): - return parse_count( - self._extractor._search_regex(r'\/span>([\d,]+) likes', self.hover, 'uploader_likes', default=None) - ) - + try: + return parse_count( + self._extractor._search_regex(r'\/span>([\d,]+) likes', self.hover, 'uploader_likes', default=None) + ) + except: + return None + def _get_request_url(self, page_id): return update_url_query(self.HOVER_URL_TEMPLATE, { From 92e5646145dbaef5d6523dfc4d2a44cea9c4d57d Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Sun, 23 Jun 2019 09:57:17 +0300 Subject: [PATCH 3/3] print exception. --- youtube_dl/extractor/facebook.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 1021345bf..0c072a5e0 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -618,9 +618,9 @@ class FacebookAjax: return parse_count( self._extractor._search_regex(r'\/span>([\d,]+) likes', self.hover, 'uploader_likes', default=None) ) - except: - return None - + except Exception as e: + self._extractor.report_warning(self._page_id + str(e)) + def _get_request_url(self, page_id): return update_url_query(self.HOVER_URL_TEMPLATE, {