From 906224637111a00e2b8012b79a8bdb965b285130 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Sun, 20 Jan 2019 10:27:20 +0200 Subject: [PATCH 1/3] Relax regex. --- youtube_dl/extractor/facebook.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 9e306d0d8..ef195a891 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -466,7 +466,7 @@ class FacebookIE(InfoExtractor): uploader_id = self._search_regex( r'ownerid:"([\d]+)', webpage, 'uploader_id', default=None) or self._search_regex( - r'\"ownerid\":"(\d+)"', tahoe_secondary_data, + r'[\'\"]ownerid[\'\"]\s*:\s*[\'\"](\d+)[\'\"]', tahoe_secondary_data, 'uploader_id', fatal=False) thumbnail = self._og_search_thumbnail(webpage) @@ -474,11 +474,11 @@ class FacebookIE(InfoExtractor): view_count = parse_count(self._search_regex( r'\bpostViewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', default=None) or self._search_regex( - r'\"postViewCount\"\s*:\s*(\d+)', tahoe_secondary_data, 'view count', + r'[\'\"]postViewCount[\'\"]\s*:\s*(\d+)', tahoe_secondary_data, 'view count', default=None) or self._search_regex( r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', default=None) or self._search_regex( - r'\"viewCount\"\s*:\s*(\d+)', tahoe_secondary_data, 'view count', + r'[\'\"]viewCount[\'\"]\s*:\s*(\d+)', tahoe_secondary_data, 'view count', default=None) ) From 32fe6908c973a7977c208919385b3588b41935db Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Sun, 20 Jan 2019 10:33:16 +0200 Subject: [PATCH 2/3] Do not fail in case secondary data not exists. --- youtube_dl/extractor/facebook.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index ef195a891..2318cf4c7 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -354,7 +354,7 @@ class FacebookIE(InfoExtractor): video_id, transform_source=js_to_json, fatal=False) video_data = extract_from_jsmods_instances(server_js_data) - tahoe_secondary_data = '' + if not video_data: if not fatal_if_no_video: return webpage, False @@ -396,7 +396,9 @@ class FacebookIE(InfoExtractor): data=tahoe_request_data, headers=tahoe_request_headers, fatal=False ) - + if not tahoe_secondary_data: + tahoe_secondary_data = '' + tahoe_js_data = self._parse_json( self._search_regex( r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_primary_data, From cd805c064c4251856adc4e1c47379dcfee74dc88 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Sun, 20 Jan 2019 10:33:40 +0200 Subject: [PATCH 3/3] remove whitespace. --- youtube_dl/extractor/facebook.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 2318cf4c7..ec2a6a1be 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -354,7 +354,6 @@ class FacebookIE(InfoExtractor): video_id, transform_source=js_to_json, fatal=False) video_data = extract_from_jsmods_instances(server_js_data) - if not video_data: if not fatal_if_no_video: return webpage, False @@ -398,7 +397,7 @@ class FacebookIE(InfoExtractor): ) if not tahoe_secondary_data: tahoe_secondary_data = '' - + tahoe_js_data = self._parse_json( self._search_regex( r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_primary_data,