From d6ae092fc9439d15065630c69ad8d70fea6477b7 Mon Sep 17 00:00:00 2001 From: hodayabu <44240078+hodayabu@users.noreply.github.com> Date: Sun, 24 May 2020 19:00:32 +0300 Subject: [PATCH] missing metadate fix (#319) * missing metadate fix * timestamp fix Co-authored-by: bhodaya --- youtube_dl/extractor/facebook.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 7ead5e58f..ed05dfa7b 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import datetime import re import socket @@ -25,10 +26,12 @@ from ..utils import ( try_get, urlencode_postdata, update_url_query, - lowercase_escape + lowercase_escape, + parse_iso8601 ) + class FacebookIE(InfoExtractor): _VALID_URL = r'''(?x) (?: @@ -451,14 +454,17 @@ class FacebookIE(InfoExtractor): self._search_regex(r'ownerName"\s*:\s*"([^"]+)"', webpage, 'uploader', default=None) or \ self._og_search_title(webpage, default=None) + if webpage.find('Paid Partnership'): + timestamp = self._search_regex( + r'datePublished":"(.+?)"', webpage, + 'timestamp', default=None) + timestamp = parse_iso8601(timestamp) + else: + timestamp = int_or_none( + self._search_regex(r'data-utime=\\\"(\d+)\\\"', tahoe_data.secondary,'timestamp', default=None) + or self._search_regex(r']+data-utime=["\'](\d+)', webpage, 'timestamp', default=None) + ) or int_or_none(self._search_regex(r'publish_time":([\d]+)', webpage, 'timestamp', default=None)) - timestamp = int_or_none(self._search_regex( - r'data-utime=\\\"(\d+)\\\"', tahoe_data.secondary, - 'timestamp', default=None) or self._search_regex( - r']+data-utime=["\'](\d+)', webpage, - 'timestamp', default=None)) or int_or_none(self._search_regex( - r'publish_time":([\d]+)', webpage, - 'timestamp', default=None)) uploader_id = self._search_regex( r'ownerid:"([\d]+)', webpage, @@ -631,7 +637,6 @@ class FacebookIE(InfoExtractor): video_title = 'Facebook video #%s' % video_id return video_title - class FacebookTahoeData: def __init__(self, extractor, page, video_id): self._page = page