From e0fee7b2eeb571b00026677a79fb158af78674d0 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Mon, 17 Jun 2019 13:49:51 +0300 Subject: [PATCH 1/9] fix ffmpeg --- youtube_dl/downloader/external.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index acdb27712..1c382ca2e 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -243,8 +243,6 @@ class FFmpegFD(ExternalFD): # http://trac.ffmpeg.org/ticket/6125#comment:10 args += ['-seekable', '1' if seekable else '0'] - args += self._configuration_args() - # start_time = info_dict.get('start_time') or 0 # if start_time: # args += ['-ss', compat_str(start_time)] @@ -311,6 +309,8 @@ class FFmpegFD(ExternalFD): args += ['-rtmp_conn', conn] args += ['-i', url, '-c', 'copy'] + + args += self._configuration_args() if self.params.get('test', False): args += ['-fs', compat_str(self._TEST_FILE_SIZE)] From 549a91de290d7dcf4d8bf67b66bad62faa7be4be Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Mon, 17 Jun 2019 13:58:41 +0300 Subject: [PATCH 2/9] . --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c0c82859e..d35ae88f3 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = 'vc.2019.06.08' +__version__ = 'vc.2019.06.17' From 4a7e1afbf3719a9034bc34e1b636865f66a99e79 Mon Sep 17 00:00:00 2001 From: Avichai Cohen Date: Wed, 19 Jun 2019 13:54:08 +0300 Subject: [PATCH 3/9] using _og_search_title before using the description as a title --- youtube_dl/extractor/facebook.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index c4555142e..892341add 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -431,6 +431,8 @@ class FacebookIE(InfoExtractor): video_title = self._html_search_regex( r'(?s)(.*?)', webpage, 'alternative title', default=None) + if not video_title: + video_title = self._og_search_title(webpage, default=None) if not video_title: video_title = self._html_search_meta( 'description', webpage, 'title', default=None) From f6afa73ac4080561908e4118e141fda637796bd3 Mon Sep 17 00:00:00 2001 From: Avichai Cohen Date: Wed, 19 Jun 2019 13:54:53 +0300 Subject: [PATCH 4/9] using the ownerName regex before using _og_search_title to get the uploader name --- youtube_dl/extractor/facebook.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 892341add..cc233b651 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -440,12 +440,13 @@ class FacebookIE(InfoExtractor): video_title = limit_length(video_title, 80) else: video_title = 'Facebook video #%s' % video_id + uploader = clean_html(get_element_by_id( 'fbPhotoPageAuthorName', webpage)) or self._search_regex( r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',default=None) or \ - self._og_search_title(webpage, default=None) or self._search_regex( + self._search_regex( r'\"ownerName\":"(.+?)"', tahoe_data.secondary, - 'uploader_id', fatal=False) + 'uploader_id', fatal=False) or self._og_search_title(webpage, default=None) timestamp = int_or_none(self._search_regex( From ff8d873ffb3ec6a7cb9fd4640fada37a586b873b Mon Sep 17 00:00:00 2001 From: Avichai Cohen Date: Wed, 19 Jun 2019 14:17:19 +0300 Subject: [PATCH 5/9] updating version --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d35ae88f3..a5def2a00 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = 'vc.2019.06.17' +__version__ = 'vc.2019.06.19' From 46fc798e15c82dff75e0a3d50ac05361e63ed2c5 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Thu, 20 Jun 2019 13:03:30 +0300 Subject: [PATCH 6/9] extract shares and likes. --- youtube_dl/extractor/facebook.py | 47 ++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index cc233b651..3ba2e648f 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -460,18 +460,12 @@ class FacebookIE(InfoExtractor): 'uploader_id', default=None) or self._search_regex( r'[\'\"]ownerid[\'\"]\s*:\s*[\'\"](\d+)[\'\"]', tahoe_data.secondary, 'uploader_id', fatal=False) + thumbnail = self._og_search_thumbnail(webpage) - view_count = parse_count(self._search_regex( - r'\bpostViewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', - default=None) or self._search_regex( - r'[\'\"]postViewCount[\'\"]\s*:\s*(\d+)', tahoe_data.secondary, 'view count', - default=None) or self._search_regex( - r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', - default=None) or self._search_regex( - r'[\'\"]viewCount[\'\"]\s*:\s*(\d+)', tahoe_data.secondary, 'view count', - default=None) - ) + view_count = parse_count(self._extract_meta_count(['postViewCount', 'viewCount'], webpage, tahoe_data, 'likes')) + likes_count = parse_count(self._extract_likes(webpage, tahoe_data)) + shares_count = parse_count(self._extract_meta_count(['sharecount'], webpage, tahoe_data, 'shares')) info_dict = { 'id': video_id, @@ -483,11 +477,42 @@ class FacebookIE(InfoExtractor): 'view_count': view_count, 'uploader_id': uploader_id, 'is_live': is_live, - 'live_status': live_status + 'live_status': live_status, + 'likes': likes_count, + 'shares': shares_count } return webpage, info_dict + def _extract_meta_count(self, fields, webpage, tahoe_data, name, ): + value = None + + for f in fields: + if value: + break + value = self._search_regex( + r'\b%s\s*:\s*["\']([\d,.]+)' % f, webpage, name, + default=None + ) + if value: + break + + value = self._search_regex( + r'[\'\"]%s[\'\"]\s*:\s*(\d+)' % f, tahoe_data.secondary, name, + default=None) + + return value + + def _extract_likes(self, webpage, tahoe_data): + values = re.findall(r'\blikecount\s*:\s*["\']([\d,.]+)', webpage) + if values: + return values[-1] + + + values = re.findall(r'[\'\"]\blikecount[\'\"]\s*:\s*(\d+)', tahoe_data.secondary) + if values: + return values[-1] + def _real_extract(self, url): video_id = self._match_id(url) From a5456c043a1f1231ff98421252573b354e0c084f Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Thu, 20 Jun 2019 13:14:42 +0300 Subject: [PATCH 7/9] . --- youtube_dl/extractor/facebook.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 3ba2e648f..1b91c9036 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -478,8 +478,8 @@ class FacebookIE(InfoExtractor): 'uploader_id': uploader_id, 'is_live': is_live, 'live_status': live_status, - 'likes': likes_count, - 'shares': shares_count + 'like_count': likes_count, + 'share_count': shares_count } return webpage, info_dict From 77c07d1cc2b167e3a910e2dd2ca1d7269f4b5a4f Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Thu, 20 Jun 2019 14:41:51 +0300 Subject: [PATCH 8/9] get twitter views. --- youtube_dl/extractor/twitter.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 41d0b6be8..852589b65 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -14,6 +14,7 @@ from ..utils import ( remove_end, try_get, xpath_text, + parse_count ) from .periscope import PeriscopeIE @@ -165,6 +166,7 @@ class TwitterCardIE(TwitterBaseIE): config = None formats = [] duration = None + view_count = None urls = [url] if path.startswith('cards/'): @@ -265,6 +267,7 @@ class TwitterCardIE(TwitterBaseIE): title = 'Twitter web player' thumbnail = config.get('posterImage') duration = float_or_none(track.get('durationMs'), scale=1000) + view_count = parse_count(track.get('viewCount')) self._remove_duplicate_formats(formats) self._sort_formats(formats) @@ -275,6 +278,7 @@ class TwitterCardIE(TwitterBaseIE): 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, + 'view_count': view_count } From 3ba6ef6ffbd165497666c6f1614d4b9b8e7b966e Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Fri, 21 Jun 2019 00:19:24 +0300 Subject: [PATCH 9/9] save guest token. --- youtube_dl/extractor/common.py | 2 +- youtube_dl/extractor/twitter.py | 16 +++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9c3e9eec6..b411882e0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2816,7 +2816,7 @@ class InfoExtractor(object): """ Return a compat_cookies.SimpleCookie with the cookies for the url """ req = sanitized_Request(url) self._downloader.cookiejar.add_cookie_header(req) - return compat_cookies.SimpleCookie(req.get_header('Cookie')) + return compat_cookies.SimpleCookie(str(req.get_header('Cookie'))) def _apply_first_set_cookie_header(self, url_handle, cookie): """ diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 852589b65..fe9860b3c 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -241,12 +241,18 @@ class TwitterCardIE(TwitterBaseIE): ct0 = self._get_cookies(url).get('ct0') if ct0: headers['csrf_token'] = ct0.value - guest_token = self._download_json( - '%s/guest/activate.json' % self._API_BASE, video_id, - 'Downloading guest token', data=b'', - headers=headers)['guest_token'] + guest_token_c = self._get_cookies('http://api.twitter.com/').get('gt') + if not guest_token_c: + guest_token = self._download_json( + '%s/guest/activate.json' % self._API_BASE, video_id, + 'Downloading guest token', data=b'', + headers=headers)['guest_token'] + self._set_cookie('api.twitter.com', 'gt', guest_token) + else: + guest_token = guest_token_c.value + headers['x-guest-token'] = guest_token - self._set_cookie('api.twitter.com', 'gt', guest_token) + config = self._download_json( '%s/videos/tweet/config/%s.json' % (self._API_BASE, video_id), video_id, headers=headers)