From 846b0921306bf57095a777236590cd63f8e6c888 Mon Sep 17 00:00:00 2001 From: hodayabu <44240078+hodayabu@users.noreply.github.com> Date: Mon, 25 May 2020 16:17:52 +0300 Subject: [PATCH] Facebook timestamp fix (#320) * missing metadate fix * timestamp fix * timestamp conditions fix * timestamp conditions fix * timestamp conditions fix Co-authored-by: bhodaya --- youtube_dl/extractor/facebook.py | 34 ++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index ed05dfa7b..f1aa02913 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -454,16 +454,30 @@ class FacebookIE(InfoExtractor): self._search_regex(r'ownerName"\s*:\s*"([^"]+)"', webpage, 'uploader', default=None) or \ self._og_search_title(webpage, default=None) - if webpage.find('Paid Partnership'): - timestamp = self._search_regex( - r'datePublished":"(.+?)"', webpage, - 'timestamp', default=None) - timestamp = parse_iso8601(timestamp) - else: - timestamp = int_or_none( - self._search_regex(r'data-utime=\\\"(\d+)\\\"', tahoe_data.secondary,'timestamp', default=None) - or self._search_regex(r']+data-utime=["\'](\d+)', webpage, 'timestamp', default=None) - ) or int_or_none(self._search_regex(r'publish_time":([\d]+)', webpage, 'timestamp', default=None)) + timestamp = self._search_regex( + r'datePublished":"(.+?)"', webpage,'timestamp', default=None)\ + or self._search_regex(r'datePublished":"(.+?)"', tahoe_data.secondary, 'timestamp', default=None)\ + or self._search_regex(r'datePublished":"(.+?)"', tahoe_data.primary, 'timestamp', default=None) + timestamp = parse_iso8601(timestamp) + + if timestamp == None and webpage.find('Paid Partnership') == -1 or\ + (timestamp == None and webpage.find('Paid Partnership') > -1 and + 'cookiefile' in self._downloader.params): + + regex_search_result_date_time = self._search_regex(r'data-utime=\\\"(\d+)\\\"', tahoe_data.secondary, 'timestamp', default=None)\ + or self._search_regex(r'data-utime=\\\"(\d+)\\\"', tahoe_data.primary, 'timestamp', default=None)\ + or self._search_regex(r'data-utime=\\\"(\d+)\\\"', webpage,'timestamp', default=None)\ + or self._search_regex(r']+data-utime=["\'](\d+)', webpage, 'timestamp', default=None)\ + or self._search_regex(r']+data-utime=["\'](\d+)', tahoe_data.secondary, 'timestamp', default=None)\ + or self._search_regex(r']+data-utime=["\'](\d+)', tahoe_data.primary, 'timestamp', default=None) + + regex_search_result_publish_time = self._search_regex(r'publish_time":([\d]+)', webpage, 'timestamp', default=None)\ + or self._search_regex(r'publish_time":([\d]+)', tahoe_data.primary, 'timestamp', default=None)\ + or self._search_regex(r'publish_time":([\d]+)', tahoe_data.secondary, 'timestamp', default=None) + + timestamp = int_or_none(regex_search_result_date_time) or int_or_none(regex_search_result_publish_time) + + uploader_id = self._search_regex(