diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index acdb27712..1c382ca2e 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -243,8 +243,6 @@ class FFmpegFD(ExternalFD): # http://trac.ffmpeg.org/ticket/6125#comment:10 args += ['-seekable', '1' if seekable else '0'] - args += self._configuration_args() - # start_time = info_dict.get('start_time') or 0 # if start_time: # args += ['-ss', compat_str(start_time)] @@ -311,6 +309,8 @@ class FFmpegFD(ExternalFD): args += ['-rtmp_conn', conn] args += ['-i', url, '-c', 'copy'] + + args += self._configuration_args() if self.params.get('test', False): args += ['-fs', compat_str(self._TEST_FILE_SIZE)] diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9c3e9eec6..b411882e0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2816,7 +2816,7 @@ class InfoExtractor(object): """ Return a compat_cookies.SimpleCookie with the cookies for the url """ req = sanitized_Request(url) self._downloader.cookiejar.add_cookie_header(req) - return compat_cookies.SimpleCookie(req.get_header('Cookie')) + return compat_cookies.SimpleCookie(str(req.get_header('Cookie'))) def _apply_first_set_cookie_header(self, url_handle, cookie): """ diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index c4555142e..1b91c9036 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -431,6 +431,8 @@ class FacebookIE(InfoExtractor): video_title = self._html_search_regex( r'(?s)(.*?)', webpage, 'alternative title', default=None) + if not video_title: + video_title = self._og_search_title(webpage, default=None) if not video_title: video_title = self._html_search_meta( 'description', webpage, 'title', default=None) @@ -438,12 +440,13 @@ class FacebookIE(InfoExtractor): video_title = limit_length(video_title, 80) else: video_title = 'Facebook video #%s' % video_id + uploader = clean_html(get_element_by_id( 'fbPhotoPageAuthorName', webpage)) or self._search_regex( r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',default=None) or \ - self._og_search_title(webpage, default=None) or self._search_regex( + self._search_regex( r'\"ownerName\":"(.+?)"', tahoe_data.secondary, - 'uploader_id', fatal=False) + 'uploader_id', fatal=False) or self._og_search_title(webpage, default=None) timestamp = int_or_none(self._search_regex( @@ -457,18 +460,12 @@ class FacebookIE(InfoExtractor): 'uploader_id', default=None) or self._search_regex( r'[\'\"]ownerid[\'\"]\s*:\s*[\'\"](\d+)[\'\"]', tahoe_data.secondary, 'uploader_id', fatal=False) + thumbnail = self._og_search_thumbnail(webpage) - view_count = parse_count(self._search_regex( - r'\bpostViewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', - default=None) or self._search_regex( - r'[\'\"]postViewCount[\'\"]\s*:\s*(\d+)', tahoe_data.secondary, 'view count', - default=None) or self._search_regex( - r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', - default=None) or self._search_regex( - r'[\'\"]viewCount[\'\"]\s*:\s*(\d+)', tahoe_data.secondary, 'view count', - default=None) - ) + view_count = parse_count(self._extract_meta_count(['postViewCount', 'viewCount'], webpage, tahoe_data, 'likes')) + likes_count = parse_count(self._extract_likes(webpage, tahoe_data)) + shares_count = parse_count(self._extract_meta_count(['sharecount'], webpage, tahoe_data, 'shares')) info_dict = { 'id': video_id, @@ -480,11 +477,42 @@ class FacebookIE(InfoExtractor): 'view_count': view_count, 'uploader_id': uploader_id, 'is_live': is_live, - 'live_status': live_status + 'live_status': live_status, + 'like_count': likes_count, + 'share_count': shares_count } return webpage, info_dict + def _extract_meta_count(self, fields, webpage, tahoe_data, name, ): + value = None + + for f in fields: + if value: + break + value = self._search_regex( + r'\b%s\s*:\s*["\']([\d,.]+)' % f, webpage, name, + default=None + ) + if value: + break + + value = self._search_regex( + r'[\'\"]%s[\'\"]\s*:\s*(\d+)' % f, tahoe_data.secondary, name, + default=None) + + return value + + def _extract_likes(self, webpage, tahoe_data): + values = re.findall(r'\blikecount\s*:\s*["\']([\d,.]+)', webpage) + if values: + return values[-1] + + + values = re.findall(r'[\'\"]\blikecount[\'\"]\s*:\s*(\d+)', tahoe_data.secondary) + if values: + return values[-1] + def _real_extract(self, url): video_id = self._match_id(url) diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py index 41d0b6be8..fe9860b3c 100644 --- a/youtube_dl/extractor/twitter.py +++ b/youtube_dl/extractor/twitter.py @@ -14,6 +14,7 @@ from ..utils import ( remove_end, try_get, xpath_text, + parse_count ) from .periscope import PeriscopeIE @@ -165,6 +166,7 @@ class TwitterCardIE(TwitterBaseIE): config = None formats = [] duration = None + view_count = None urls = [url] if path.startswith('cards/'): @@ -239,12 +241,18 @@ class TwitterCardIE(TwitterBaseIE): ct0 = self._get_cookies(url).get('ct0') if ct0: headers['csrf_token'] = ct0.value - guest_token = self._download_json( - '%s/guest/activate.json' % self._API_BASE, video_id, - 'Downloading guest token', data=b'', - headers=headers)['guest_token'] + guest_token_c = self._get_cookies('http://api.twitter.com/').get('gt') + if not guest_token_c: + guest_token = self._download_json( + '%s/guest/activate.json' % self._API_BASE, video_id, + 'Downloading guest token', data=b'', + headers=headers)['guest_token'] + self._set_cookie('api.twitter.com', 'gt', guest_token) + else: + guest_token = guest_token_c.value + headers['x-guest-token'] = guest_token - self._set_cookie('api.twitter.com', 'gt', guest_token) + config = self._download_json( '%s/videos/tweet/config/%s.json' % (self._API_BASE, video_id), video_id, headers=headers) @@ -265,6 +273,7 @@ class TwitterCardIE(TwitterBaseIE): title = 'Twitter web player' thumbnail = config.get('posterImage') duration = float_or_none(track.get('durationMs'), scale=1000) + view_count = parse_count(track.get('viewCount')) self._remove_duplicate_formats(formats) self._sort_formats(formats) @@ -275,6 +284,7 @@ class TwitterCardIE(TwitterBaseIE): 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, + 'view_count': view_count } diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c0c82859e..a5def2a00 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = 'vc.2019.06.08' +__version__ = 'vc.2019.06.19'