missing metadate fix (#319)

* missing metadate fix

* timestamp fix

Co-authored-by: bhodaya <bhodaya@videocites.com>
This commit is contained in:
hodayabu 2020-05-24 19:00:32 +03:00 committed by GitHub
parent cd13c98582
commit d6ae092fc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import datetime
import re import re
import socket import socket
@ -25,10 +26,12 @@ from ..utils import (
try_get, try_get,
urlencode_postdata, urlencode_postdata,
update_url_query, update_url_query,
lowercase_escape lowercase_escape,
parse_iso8601
) )
class FacebookIE(InfoExtractor): class FacebookIE(InfoExtractor):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
(?: (?:
@ -451,14 +454,17 @@ class FacebookIE(InfoExtractor):
self._search_regex(r'ownerName"\s*:\s*"([^"]+)"', webpage, 'uploader', default=None) or \ self._search_regex(r'ownerName"\s*:\s*"([^"]+)"', webpage, 'uploader', default=None) or \
self._og_search_title(webpage, default=None) self._og_search_title(webpage, default=None)
if webpage.find('Paid Partnership'):
timestamp = self._search_regex(
r'datePublished":"(.+?)"', webpage,
'timestamp', default=None)
timestamp = parse_iso8601(timestamp)
else:
timestamp = int_or_none(
self._search_regex(r'data-utime=\\\"(\d+)\\\"', tahoe_data.secondary,'timestamp', default=None)
or self._search_regex(r'<abbr[^>]+data-utime=["\'](\d+)', webpage, 'timestamp', default=None)
) or int_or_none(self._search_regex(r'publish_time&quot;:([\d]+)', webpage, 'timestamp', default=None))
timestamp = int_or_none(self._search_regex(
r'data-utime=\\\"(\d+)\\\"', tahoe_data.secondary,
'timestamp', default=None) or self._search_regex(
r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
'timestamp', default=None)) or int_or_none(self._search_regex(
r'publish_time&quot;:([\d]+)', webpage,
'timestamp', default=None))
uploader_id = self._search_regex( uploader_id = self._search_regex(
r'ownerid:"([\d]+)', webpage, r'ownerid:"([\d]+)', webpage,
@ -631,7 +637,6 @@ class FacebookIE(InfoExtractor):
video_title = 'Facebook video #%s' % video_id video_title = 'Facebook video #%s' % video_id
return video_title return video_title
class FacebookTahoeData: class FacebookTahoeData:
def __init__(self, extractor, page, video_id): def __init__(self, extractor, page, video_id):
self._page = page self._page = page