fix facebook thumbnail
This commit is contained in:
parent
b84d66e626
commit
a1986b5bf2
@ -1,5 +1,6 @@
|
|||||||
import unittest
|
import unittest
|
||||||
import youtube_dl
|
import youtube_dl
|
||||||
|
from youtube_dl.utils import DownloadError
|
||||||
|
|
||||||
|
|
||||||
class facebookMetaData(unittest.TestCase):
|
class facebookMetaData(unittest.TestCase):
|
||||||
@ -25,8 +26,15 @@ class facebookMetaData(unittest.TestCase):
|
|||||||
info = ydl.extract_info(url, download=False)
|
info = ydl.extract_info(url, download=False)
|
||||||
self.assertGreater(info.get('comment_count'), 0)
|
self.assertGreater(info.get('comment_count'), 0)
|
||||||
|
|
||||||
|
def test_meta_data(self):
|
||||||
|
params = {}
|
||||||
|
url = "https://www.facebook.com/watch?v=177407933624543/"
|
||||||
|
ydl = youtube_dl.YoutubeDL(params)
|
||||||
|
info = ydl.extract_info(url, download=False)
|
||||||
|
self.assertGreater(info.get('comment_count'), 0)
|
||||||
|
|
||||||
def test_metadata_fetch_with_log_in(self):
|
def test_metadata_fetch_with_log_in(self):
|
||||||
url = "https://www.facebook.com/SerieA/videos/282581803097269"
|
url = "https://www.facebook.com/oristandup/videos/675360549895283"
|
||||||
params = {}
|
params = {}
|
||||||
with open("cookie_file") as file:
|
with open("cookie_file") as file:
|
||||||
proxy = "ec2-3-221-82-67.compute-1.amazonaws.com:3128"
|
proxy = "ec2-3-221-82-67.compute-1.amazonaws.com:3128"
|
||||||
@ -34,6 +42,8 @@ class facebookMetaData(unittest.TestCase):
|
|||||||
params['proxy'] = proxy
|
params['proxy'] = proxy
|
||||||
ydl = youtube_dl.YoutubeDL(params)
|
ydl = youtube_dl.YoutubeDL(params)
|
||||||
info = ydl.extract_info(url, download=False)
|
info = ydl.extract_info(url, download=False)
|
||||||
|
print (info.get('title'))
|
||||||
|
print (info.get('timestamp'))
|
||||||
self.assertTrue(info.get('timestamp'))
|
self.assertTrue(info.get('timestamp'))
|
||||||
self.assertTrue(info.get('view_count'))
|
self.assertTrue(info.get('view_count'))
|
||||||
self.assertTrue(info.get('comment_count'))
|
self.assertTrue(info.get('comment_count'))
|
||||||
@ -41,6 +51,19 @@ class facebookMetaData(unittest.TestCase):
|
|||||||
self.assertTrue(info.get('uploader_id'))
|
self.assertTrue(info.get('uploader_id'))
|
||||||
self.assertTrue(info.get('thumbnail'))
|
self.assertTrue(info.get('thumbnail'))
|
||||||
|
|
||||||
|
def test_unavailable_video(self):
|
||||||
|
url = "https://www.facebook.com/101457238278830/videos/287839102599521/"
|
||||||
|
params = {}
|
||||||
|
with open("cookie_file") as file:
|
||||||
|
try:
|
||||||
|
proxy = "ec2-3-221-82-67.compute-1.amazonaws.com:3128"
|
||||||
|
params['cookiefile'] = file.name
|
||||||
|
params['proxy'] = proxy
|
||||||
|
ydl = youtube_dl.YoutubeDL(params)
|
||||||
|
info = ydl.extract_info(url, download=False)
|
||||||
|
except DownloadError:
|
||||||
|
self.assertRaises(DownloadError)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -374,13 +374,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
if not video_data:
|
if not video_data:
|
||||||
if not fatal_if_no_video:
|
if not fatal_if_no_video:
|
||||||
return webpage, False
|
return webpage, False
|
||||||
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
self.validate_webpage(webpage)
|
||||||
if m_msg is not None:
|
|
||||||
raise ExtractorError(
|
|
||||||
'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
|
||||||
expected=True)
|
|
||||||
elif '>You must log in to continue' in webpage:
|
|
||||||
self.raise_login_required()
|
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
info_dict = self.get_from_new_ui(webpage, tahoe_data, video_id)
|
info_dict = self.get_from_new_ui(webpage, tahoe_data, video_id)
|
||||||
@ -806,13 +800,42 @@ class FacebookIE(InfoExtractor):
|
|||||||
|
|
||||||
def _resolve_thumbnail(self, webpage, tahoe_data):
|
def _resolve_thumbnail(self, webpage, tahoe_data):
|
||||||
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
|
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
|
||||||
|
|
||||||
if not thumbnail:
|
if not thumbnail:
|
||||||
thumbnail = self._search_regex(r'"thumbSrc":"(.+?)"', tahoe_data.secondary, 'thumbnail', fatal=False)
|
page = self.resolve_full_webpage(tahoe_data)
|
||||||
|
thumbnail = self._search_regex(r'"thumbnailUrl":"(.+?)"', page, 'thumbnail', fatal=False)
|
||||||
thumbnail = str(thumbnail).replace('\\', "")
|
thumbnail = str(thumbnail).replace('\\', "")
|
||||||
return thumbnail
|
return thumbnail
|
||||||
|
|
||||||
def _valid_video_title(self, video_title):
|
def _valid_video_title(self, video_title):
|
||||||
return video_title and not u'Log In or Sign Up to View' in video_title
|
if video_title:
|
||||||
|
video_title = video_title.lower()
|
||||||
|
return video_title and not u'log in or sign up to view' in video_title
|
||||||
|
|
||||||
|
def validate_webpage(self, webpage):
|
||||||
|
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
||||||
|
if m_msg is not None:
|
||||||
|
raise ExtractorError(
|
||||||
|
'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
||||||
|
expected=True)
|
||||||
|
if 'Your Request Couldn\'t be Processed' in webpage:
|
||||||
|
raise ExtractorError(
|
||||||
|
'The video is not available, Facebook said: this content is not available',
|
||||||
|
expected=True)
|
||||||
|
elif '>You must log in to continue' in webpage:
|
||||||
|
self.raise_login_required()
|
||||||
|
|
||||||
|
def resolve_full_webpage(self, tahoe_data):
|
||||||
|
import urllib2
|
||||||
|
user_agent = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3'
|
||||||
|
headers = {'User-Agent': user_agent}
|
||||||
|
full_url = self._search_regex(r'"permalinkURL":"(.+?)"', tahoe_data.primary, 'video_url', fatal=False)
|
||||||
|
full_url = str(full_url).replace('\\', "")
|
||||||
|
req = urllib2.Request(full_url, None, headers)
|
||||||
|
response = urllib2.urlopen(req)
|
||||||
|
page = response.read()
|
||||||
|
response.close()
|
||||||
|
return page
|
||||||
|
|
||||||
|
|
||||||
class FacebookTahoeData:
|
class FacebookTahoeData:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user