From 4bc774fe04f45eeeeeb7bbf7c99af84268ff293f Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Thu, 27 Apr 2017 13:13:08 +0200 Subject: [PATCH 1/4] [buzzfeed] extract more URLs --- youtube_dl/extractor/buzzfeed.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/buzzfeed.py b/youtube_dl/extractor/buzzfeed.py index 75fa92d7c..a782c65c7 100644 --- a/youtube_dl/extractor/buzzfeed.py +++ b/youtube_dl/extractor/buzzfeed.py @@ -72,11 +72,17 @@ class BuzzFeedIE(InfoExtractor): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) + entries = [ + self.url_result(match[2]) + for match in re.findall( + r'''(?s)["'])js-placeholder-link''' + r'''(?P=q)\s+href\s*=\s*(?P["'])(.*?)(?P=r)''', webpage) + ] + all_buckets = re.findall( r'(?s)
Date: Thu, 27 Apr 2017 13:13:19 +0200 Subject: [PATCH 2/4] [facebook] extract more URLs --- youtube_dl/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index b69c1ede0..ada435b2e 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -211,7 +211,7 @@ class FacebookIE(InfoExtractor): # Facebook API embed # see https://developers.facebook.com/docs/plugins/embedded-video-player - mobj = re.search(r'''(?x)]+ + mobj = re.search(r'''(?x)<(?:div|fb:post)[^>]+ class=(?P[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+ data-href=(?P[\'"])(?P(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage) if mobj is not None: From 93cf8bde94bc6dfceb1a9b85a220282eff5b8ca4 Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Thu, 27 Apr 2017 13:17:08 +0200 Subject: [PATCH 3/4] [buzzfeed] update test cases --- youtube_dl/extractor/buzzfeed.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/buzzfeed.py b/youtube_dl/extractor/buzzfeed.py index a782c65c7..a95b54b95 100644 --- a/youtube_dl/extractor/buzzfeed.py +++ b/youtube_dl/extractor/buzzfeed.py @@ -22,12 +22,13 @@ class BuzzFeedIE(InfoExtractor): 'id': 'aVCR29aE_OQ', 'ext': 'mp4', 'title': 'Angry Ram destroys a punching bag..', - 'description': 'md5:c59533190ef23fd4458a5e8c8c872345', + 'description': 'md5:8b3e34589d9b3e387dcb837098b859d8', 'upload_date': '20141024', 'uploader_id': 'Buddhanz1', 'uploader': 'Angry Ram', } - }] + }], + 'add_ie': ['Youtube'], }, { 'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia', 'params': { @@ -48,7 +49,8 @@ class BuzzFeedIE(InfoExtractor): 'uploader_id': 'CindysMunchkin', 'uploader': 're:^Munchkin the', }, - }] + }], + 'add_ie': ['Youtube'], }, { 'url': 'http://www.buzzfeed.com/craigsilverman/the-most-adorable-crash-landing-ever#.eq7pX0BAmK', 'info_dict': { @@ -57,12 +59,14 @@ class BuzzFeedIE(InfoExtractor): 'description': 'This gosling knows how to stick a landing.', }, 'playlist': [{ - 'md5': '763ca415512f91ca62e4621086900a23', + 'md5': '2ca4672b84a6a9ab24561c847c8b82dc', 'info_dict': { 'id': '971793786185728', 'ext': 'mp4', 'title': 'We set up crash pads so that the goslings on our roof would have a safe landi...', 'uploader': 'Calgary Outdoor Centre-University of Calgary', + 'upload_date': '20150511', + 'timestamp': 1431380091, }, }], 'add_ie': ['Facebook'], From 10bf7493c83038123c31901f81bfd23913b1f031 Mon Sep 17 00:00:00 2001 From: Tithen-Firion Date: Thu, 27 Apr 2017 17:46:19 +0200 Subject: [PATCH 4/4] [buzzfeed] improve regex --- youtube_dl/extractor/buzzfeed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/buzzfeed.py b/youtube_dl/extractor/buzzfeed.py index a95b54b95..9a34315cd 100644 --- a/youtube_dl/extractor/buzzfeed.py +++ b/youtube_dl/extractor/buzzfeed.py @@ -80,7 +80,7 @@ class BuzzFeedIE(InfoExtractor): self.url_result(match[2]) for match in re.findall( r'''(?s)["'])js-placeholder-link''' - r'''(?P=q)\s+href\s*=\s*(?P["'])(.*?)(?P=r)''', webpage) + r'''(?P=q)\s+href\s*=\s*(?P["'])(.+?)(?P=r)''', webpage) ] all_buckets = re.findall(