From 825e26a81449de07e5deef0a6d71e1977d18e5cd Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Tue, 25 Dec 2018 10:44:00 +0200 Subject: [PATCH 01/43] Handle error message for vk --- youtube_dl/extractor/vk.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index b52d15ac6..e0d691e25 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -298,7 +298,13 @@ class VKIE(VKBaseIE): # The video is not available in your region. 'url': 'https://vk.com/video-51812607_171445436', 'only_matching': True, - }] + }, + { + # Video %s is not available. + 'url': 'https://vk.com/video-173478245_456239188', + 'only_matching': True, + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -361,6 +367,9 @@ class VKIE(VKBaseIE): r'The video .+? is not available in your region.': 'Video %s is not available in your region.', + + r'The video .+? is unavailable': + 'Video %s is not available.', } for error_re, error_msg in ERRORS.items(): From cc793fcab5b9fb07d47d2ac80a0bcd4c723298e8 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Thu, 27 Dec 2018 23:43:13 +0200 Subject: [PATCH 02/43] fix ok slowness. fix upload date extraction --- youtube_dl/extractor/odnoklassniki.py | 30 +++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 190d8af4d..d52159da1 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -127,8 +127,8 @@ class OdnoklassnikiIE(InfoExtractor): 'http://ok.ru/video/%s' % video_id, video_id) error = self._search_regex( - r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<', - webpage, 'error', default=None) + r'
(?P.*?)<\/div>', + webpage, name='error',group='error', default=None) if error: raise ExtractorError(error, expected=True) @@ -172,6 +172,32 @@ class OdnoklassnikiIE(InfoExtractor): upload_date = unified_strdate(self._html_search_meta( 'ya:ovs:upload_date', webpage, 'upload date', default=None)) + if upload_date is None: + upload_date_str = self._search_regex( + r'vp-layer-info_date">(?P.*?)<\/span>', + webpage, 'upload date', group='date') + if upload_date_str: + from datetime import datetime + upload_date_time = None + try: + upload_date_time = datetime.strptime(upload_date_str, '%d %b %Y') + except: + pass + try: + upload_date_time = datetime.strptime(upload_date_str, '%d %b') + upload_date_time = upload_date_time.replace(year=datetime.utcnow().year) + except: + pass + try: + upload_date_time = datetime.strptime(upload_date_str, '%H:%M') + upload_date_time = upload_date_time.replace(year=datetime.utcnow().year) + upload_date_time = upload_date_time.replace(day=datetime.utcnow().day) + except: + pass + + if upload_date_time: + upload_date = upload_date_time.strftime('%Y%m%d') + age_limit = None adult = self._html_search_meta( 'ya:ovs:adult', webpage, 'age limit', default=None) From 89acfecb6898b35f8dbb7b3e697aa81089270d75 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Thu, 27 Dec 2018 23:57:31 +0200 Subject: [PATCH 03/43] fix yestarday videos. --- youtube_dl/extractor/odnoklassniki.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index d52159da1..a3785bb62 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -177,7 +177,7 @@ class OdnoklassnikiIE(InfoExtractor): r'vp-layer-info_date">(?P.*?)<\/span>', webpage, 'upload date', group='date') if upload_date_str: - from datetime import datetime + from datetime import datetime, timedelta upload_date_time = None try: upload_date_time = datetime.strptime(upload_date_str, '%d %b %Y') @@ -189,9 +189,15 @@ class OdnoklassnikiIE(InfoExtractor): except: pass try: - upload_date_time = datetime.strptime(upload_date_str, '%H:%M') + if upload_date_str.find(':') >=0: + hour_and_minutes = upload_date_str.split(' ')[-1] + else: + hour_and_minutes = upload_date_str + upload_date_time = datetime.strptime(hour_and_minutes, '%H:%M') upload_date_time = upload_date_time.replace(year=datetime.utcnow().year) upload_date_time = upload_date_time.replace(day=datetime.utcnow().day) + if upload_date_str.find('yesterday') ==0: + upload_date_time = upload_date_time - timedelta(days=1) except: pass From dfeed7fa42dc3aafd41cd9fae2f26b1a4631275c Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Sun, 30 Dec 2018 22:25:04 +0200 Subject: [PATCH 04/43] add another date option --- youtube_dl/extractor/odnoklassniki.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index a3785bb62..1dbda0ef2 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -188,6 +188,12 @@ class OdnoklassnikiIE(InfoExtractor): upload_date_time = upload_date_time.replace(year=datetime.utcnow().year) except: pass + try: + upload_date_time = datetime.strptime(upload_date_str, '%d %B') + upload_date_time = upload_date_time.replace(year=datetime.utcnow().year) + except: + pass + try: if upload_date_str.find(':') >=0: hour_and_minutes = upload_date_str.split(' ')[-1] From facff120016831d481acce333d239edca2f6eafd Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Sun, 30 Dec 2018 22:36:35 +0200 Subject: [PATCH 05/43] lets go wild --- youtube_dl/extractor/odnoklassniki.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 1dbda0ef2..621e9d497 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -177,6 +177,7 @@ class OdnoklassnikiIE(InfoExtractor): r'vp-layer-info_date">(?P.*?)<\/span>', webpage, 'upload date', group='date') if upload_date_str: + upload_date_str = upload_date_str.replace('Sept', 'Sep') from datetime import datetime, timedelta upload_date_time = None try: From 4303495ee80833ad7e3d2389a94e2db317ba44e6 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Mon, 14 Jan 2019 13:29:24 +0200 Subject: [PATCH 06/43] Facebook - get timestamp from tahoe if missing. --- youtube_dl/extractor/facebook.py | 37 +++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 74954049d..05ea8a473 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -57,7 +57,8 @@ class FacebookIE(InfoExtractor): _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36' _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s' - _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary' + _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=%s' + _TESTS = [{ 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf', @@ -222,6 +223,10 @@ class FacebookIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + # no timestamp + 'url': 'https://www.facebook.com/ChickenShow1996/videos/2289288568020072/', + 'only_matching': True, }] @staticmethod @@ -339,6 +344,7 @@ class FacebookIE(InfoExtractor): video_id, transform_source=js_to_json, fatal=False) video_data = extract_from_jsmods_instances(server_js_data) + tahoe_secondary_data = '' if not video_data: if not fatal_if_no_video: return webpage, False @@ -352,9 +358,7 @@ class FacebookIE(InfoExtractor): # Video info not in first request, do a secondary request using # tahoe player specific URL - tahoe_data = self._download_webpage( - self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id, - data=urlencode_postdata({ + tahoe_request_data = urlencode_postdata({ '__a': 1, '__pc': self._search_regex( r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage, @@ -365,15 +369,29 @@ class FacebookIE(InfoExtractor): 'fb_dtsg': self._search_regex( r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"', webpage, 'dtsg token', default=''), - }), - headers={ - 'Content-Type': 'application/x-www-form-urlencoded', }) + tahoe_request_headers = { + 'Content-Type': 'application/x-www-form-urlencoded', + } + + tahoe_primary_data = self._download_webpage( + self._VIDEO_PAGE_TAHOE_TEMPLATE % (video_id, 'primary'), video_id, + data=tahoe_request_data, + headers=tahoe_request_headers + ) + + tahoe_secondary_data = self._download_webpage( + self._VIDEO_PAGE_TAHOE_TEMPLATE % (video_id, 'secondary'), video_id, + data=tahoe_request_data, + headers=tahoe_request_headers + ) + tahoe_js_data = self._parse_json( self._search_regex( - r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data, + r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_primary_data, 'tahoe js data', default='{}'), video_id, fatal=False) + video_data = extract_from_jsmods_instances(tahoe_js_data) if not video_data: @@ -427,7 +445,10 @@ class FacebookIE(InfoExtractor): fatal=False) or self._og_search_title(webpage, fatal=False) timestamp = int_or_none(self._search_regex( r']+data-utime=["\'](\d+)', webpage, + 'timestamp', default=None) or self._search_regex( + r'data-utime=\\\"(\d+)\\\"', tahoe_secondary_data, 'timestamp', default=None)) + thumbnail = self._og_search_thumbnail(webpage) view_count = parse_count(self._search_regex( From e921ad845d29ed5a192dc6910b2457372ee27ec1 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Mon, 14 Jan 2019 13:37:15 +0200 Subject: [PATCH 07/43] flake8 fixes --- youtube_dl/extractor/facebook.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 05ea8a473..b1847dd21 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -59,7 +59,6 @@ class FacebookIE(InfoExtractor): _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s' _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=%s' - _TESTS = [{ 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf', 'md5': '6a40d33c0eccbb1af76cf0485a052659', @@ -358,7 +357,8 @@ class FacebookIE(InfoExtractor): # Video info not in first request, do a secondary request using # tahoe player specific URL - tahoe_request_data = urlencode_postdata({ + tahoe_request_data = urlencode_postdata( + { '__a': 1, '__pc': self._search_regex( r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage, @@ -371,8 +371,8 @@ class FacebookIE(InfoExtractor): webpage, 'dtsg token', default=''), }) tahoe_request_headers = { - 'Content-Type': 'application/x-www-form-urlencoded', - } + 'Content-Type': 'application/x-www-form-urlencoded', + } tahoe_primary_data = self._download_webpage( self._VIDEO_PAGE_TAHOE_TEMPLATE % (video_id, 'primary'), video_id, From 5916a2fc38a505c06c8dbb585a06b6bd5fe324df Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Mon, 14 Jan 2019 16:05:59 +0200 Subject: [PATCH 08/43] read uploader id --- youtube_dl/extractor/facebook.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index b1847dd21..d70837402 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -449,6 +449,12 @@ class FacebookIE(InfoExtractor): r'data-utime=\\\"(\d+)\\\"', tahoe_secondary_data, 'timestamp', default=None)) + uploader_id = self._search_regex( + r'ownerid:"([\d]+)', webpage, + 'uploader_id', default=None) or self._search_regex( + r'\"ownerid\":"(\d+)"', tahoe_secondary_data, + 'uploader_id', default=None) + thumbnail = self._og_search_thumbnail(webpage) view_count = parse_count(self._search_regex( @@ -463,6 +469,7 @@ class FacebookIE(InfoExtractor): 'timestamp': timestamp, 'thumbnail': thumbnail, 'view_count': view_count, + 'uploader_id': uploader_id } return webpage, info_dict From ba3b2c535f653f42e6c4d2d86ea172673d46f105 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Mon, 14 Jan 2019 16:51:44 +0200 Subject: [PATCH 09/43] read viewcount properly. --- youtube_dl/extractor/facebook.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index d70837402..f7b65a2ae 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -458,8 +458,11 @@ class FacebookIE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) view_count = parse_count(self._search_regex( - r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', - default=None)) + r'\bpostViewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', + default=None) or self._search_regex( + r'\"postViewCount\"\s*:\s*(\d+)', tahoe_secondary_data, 'view count', + default=None) + ) info_dict = { 'id': video_id, From 8b50d432121ce659460715c97b8d4cc8d0b76e3f Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Mon, 14 Jan 2019 16:56:04 +0200 Subject: [PATCH 10/43] read viewCount when postViewCount is not available --- youtube_dl/extractor/facebook.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index f7b65a2ae..028182280 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -461,6 +461,10 @@ class FacebookIE(InfoExtractor): r'\bpostViewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', default=None) or self._search_regex( r'\"postViewCount\"\s*:\s*(\d+)', tahoe_secondary_data, 'view count', + default=None) or self._search_regex( + r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', + default=None) or self._search_regex( + r'\"viewCount\"\s*:\s*(\d+)', tahoe_secondary_data, 'view count', default=None) ) From 8811f87a76dc9c7a397bbe5ef8480fdd55b570db Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Mon, 14 Jan 2019 17:26:07 +0200 Subject: [PATCH 11/43] get uploader for tahoe data --- youtube_dl/extractor/facebook.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 028182280..4fdb75080 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -442,7 +442,9 @@ class FacebookIE(InfoExtractor): uploader = clean_html(get_element_by_id( 'fbPhotoPageAuthorName', webpage)) or self._search_regex( r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', - fatal=False) or self._og_search_title(webpage, fatal=False) + fatal=False) or self._og_search_title(webpage, fatal=False, default=None) or self._search_regex( + r'\"ownerName\":"(.*?)"', tahoe_secondary_data, + 'uploader_id') timestamp = int_or_none(self._search_regex( r']+data-utime=["\'](\d+)', webpage, 'timestamp', default=None) or self._search_regex( From f851a409e05a8260a53c8e9a1f2f11d76df62ba3 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Mon, 14 Jan 2019 17:37:09 +0200 Subject: [PATCH 12/43] hide unable to extract uploader warning --- youtube_dl/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 4fdb75080..d12948b06 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -442,7 +442,7 @@ class FacebookIE(InfoExtractor): uploader = clean_html(get_element_by_id( 'fbPhotoPageAuthorName', webpage)) or self._search_regex( r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', - fatal=False) or self._og_search_title(webpage, fatal=False, default=None) or self._search_regex( + fatal=False, default=None) or self._og_search_title(webpage, fatal=False, default=None) or self._search_regex( r'\"ownerName\":"(.*?)"', tahoe_secondary_data, 'uploader_id') timestamp = int_or_none(self._search_regex( From f8ae9b34da3e71f2b15f3b1e3b86616789cc5b59 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Wed, 16 Jan 2019 10:41:57 +0200 Subject: [PATCH 13/43] apply comments --- youtube_dl/extractor/facebook.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index d12948b06..9e306d0d8 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -224,8 +224,19 @@ class FacebookIE(InfoExtractor): }, }, { # no timestamp - 'url': 'https://www.facebook.com/ChickenShow1996/videos/2289288568020072/', - 'only_matching': True, + 'url': 'https://www.facebook.com/SuperNewsGames/videos/642255722780473/', + 'info_dict': { + 'timestamp': 1521221400, + 'uploader': 'Super News Games', + 'uploader_id': '229550157384367', + 'id': '642255722780473', + 'ext': 'mp4', + 'upload_date': '20180316', + 'title': 'The Voice of Nick is trying Fortnite after 100 hours of PLAYERUNKNOWN\'S BATTL...', + }, + 'params': { + 'skip_download': True, + }, }] @staticmethod @@ -383,7 +394,7 @@ class FacebookIE(InfoExtractor): tahoe_secondary_data = self._download_webpage( self._VIDEO_PAGE_TAHOE_TEMPLATE % (video_id, 'secondary'), video_id, data=tahoe_request_data, - headers=tahoe_request_headers + headers=tahoe_request_headers, fatal=False ) tahoe_js_data = self._parse_json( @@ -441,10 +452,11 @@ class FacebookIE(InfoExtractor): video_title = 'Facebook video #%s' % video_id uploader = clean_html(get_element_by_id( 'fbPhotoPageAuthorName', webpage)) or self._search_regex( - r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', - fatal=False, default=None) or self._og_search_title(webpage, fatal=False, default=None) or self._search_regex( - r'\"ownerName\":"(.*?)"', tahoe_secondary_data, - 'uploader_id') + r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',default=None) or \ + self._og_search_title(webpage, default=None) or self._search_regex( + r'\"ownerName\":"(.+?)"', tahoe_secondary_data, + 'uploader_id', fatal=False) + timestamp = int_or_none(self._search_regex( r']+data-utime=["\'](\d+)', webpage, 'timestamp', default=None) or self._search_regex( @@ -455,7 +467,7 @@ class FacebookIE(InfoExtractor): r'ownerid:"([\d]+)', webpage, 'uploader_id', default=None) or self._search_regex( r'\"ownerid\":"(\d+)"', tahoe_secondary_data, - 'uploader_id', default=None) + 'uploader_id', fatal=False) thumbnail = self._og_search_thumbnail(webpage) From 1ab0f7a742af9cb6061424271d167be993e4ffd4 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Wed, 16 Jan 2019 11:34:16 +0200 Subject: [PATCH 14/43] update version --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5ba61f489..3671a1fd3 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.01.10' +__version__ = 'vc.2019.01.16' From 341cc7125b24a6921c04f9dbc7a3feb64acfd8d2 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Wed, 16 Jan 2019 12:31:43 +0200 Subject: [PATCH 15/43] Raise error in case of paid videos. --- youtube_dl/extractor/odnoklassniki.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 190d8af4d..3a5476e58 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -115,7 +115,12 @@ class OdnoklassnikiIE(InfoExtractor): }, { 'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#', 'only_matching': True, - }] + }, { + # Paid video + 'url': 'https://ok.ru/video/954886983203', + 'only_matching': True, + } + ] def _real_extract(self, url): start_time = int_or_none(compat_parse_qs( @@ -153,6 +158,10 @@ class OdnoklassnikiIE(InfoExtractor): video_id, 'Downloading metadata JSON', data=urlencode_postdata(data)) + paymentInfo = metadata.get('paymentInfo') + if paymentInfo: + raise ExtractorError('This is Paid video. you need to subscribe in order to watch it', expected=True) + movie = metadata['movie'] # Some embedded videos may not contain title in movie dict (e.g. From 84d83668463f5132831d650d5290658b135cbe0b Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Thu, 17 Jan 2019 15:43:27 +0200 Subject: [PATCH 16/43] fetch vimeo views --- youtube_dl/extractor/vimeo.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index fd37f919b..3415ca597 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -353,6 +353,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'timestamp': 1324343742, 'upload_date': '20111220', 'description': 'md5:ae23671e82d05415868f7ad1aec21147', + 'view_count': int, }, }, { @@ -641,12 +642,17 @@ class VimeoIE(VimeoBaseInfoExtractor): 'timestamp', default=None) try: - view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count')) + # When userInteractionCount does not exist views is 0 + view_count = int_or_none( + self._search_regex( + r'"interactionType":"http:\/\/schema\.org\/WatchAction","userInteractionCount":(.+?)}', + webpage, 'view count', default=0 + ) + ) like_count = int(self._search_regex(r'UserLikes:(\d+)', webpage, 'like count')) comment_count = int(self._search_regex(r'UserComments:(\d+)', webpage, 'comment count')) except RegexNotFoundError: # This info is only available in vimeo.com/{id} urls - view_count = None like_count = None comment_count = None From 0e3e9bd43519b2c0950bd6df4747540be98c28c1 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Sun, 20 Jan 2019 10:08:21 +0200 Subject: [PATCH 17/43] Handle paid video only if formats is missing. --- youtube_dl/extractor/odnoklassniki.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 3a5476e58..11f7f9dc3 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -158,10 +158,6 @@ class OdnoklassnikiIE(InfoExtractor): video_id, 'Downloading metadata JSON', data=urlencode_postdata(data)) - paymentInfo = metadata.get('paymentInfo') - if paymentInfo: - raise ExtractorError('This is Paid video. you need to subscribe in order to watch it', expected=True) - movie = metadata['movie'] # Some embedded videos may not contain title in movie dict (e.g. @@ -255,5 +251,10 @@ class OdnoklassnikiIE(InfoExtractor): self._sort_formats(formats) + if not formats: + payment_info = metadata.get('paymentInfo') + if payment_info: + raise ExtractorError('This video is paid, subscribe to download it', expected=True) + info['formats'] = formats return info From 541557124dae1613aee1f10210103f731205746f Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Sun, 20 Jan 2019 10:12:46 +0200 Subject: [PATCH 18/43] Aplly comment --- youtube_dl/extractor/odnoklassniki.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 11f7f9dc3..28771bb7f 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -249,12 +249,11 @@ class OdnoklassnikiIE(InfoExtractor): 'ext': 'flv', }) - self._sort_formats(formats) - if not formats: payment_info = metadata.get('paymentInfo') if payment_info: raise ExtractorError('This video is paid, subscribe to download it', expected=True) + self._sort_formats(formats) info['formats'] = formats return info From 4045fa9d9a908588ca10493aa12b3a409cfc8c6c Mon Sep 17 00:00:00 2001 From: Sergey M Date: Sun, 20 Jan 2019 15:14:04 +0700 Subject: [PATCH 19/43] Update odnoklassniki.py --- youtube_dl/extractor/odnoklassniki.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 28771bb7f..114b93c07 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -119,8 +119,7 @@ class OdnoklassnikiIE(InfoExtractor): # Paid video 'url': 'https://ok.ru/video/954886983203', 'only_matching': True, - } - ] + }] def _real_extract(self, url): start_time = int_or_none(compat_parse_qs( @@ -255,5 +254,6 @@ class OdnoklassnikiIE(InfoExtractor): raise ExtractorError('This video is paid, subscribe to download it', expected=True) self._sort_formats(formats) + info['formats'] = formats return info From 906224637111a00e2b8012b79a8bdb965b285130 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Sun, 20 Jan 2019 10:27:20 +0200 Subject: [PATCH 20/43] Relax regex. --- youtube_dl/extractor/facebook.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 9e306d0d8..ef195a891 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -466,7 +466,7 @@ class FacebookIE(InfoExtractor): uploader_id = self._search_regex( r'ownerid:"([\d]+)', webpage, 'uploader_id', default=None) or self._search_regex( - r'\"ownerid\":"(\d+)"', tahoe_secondary_data, + r'[\'\"]ownerid[\'\"]\s*:\s*[\'\"](\d+)[\'\"]', tahoe_secondary_data, 'uploader_id', fatal=False) thumbnail = self._og_search_thumbnail(webpage) @@ -474,11 +474,11 @@ class FacebookIE(InfoExtractor): view_count = parse_count(self._search_regex( r'\bpostViewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', default=None) or self._search_regex( - r'\"postViewCount\"\s*:\s*(\d+)', tahoe_secondary_data, 'view count', + r'[\'\"]postViewCount[\'\"]\s*:\s*(\d+)', tahoe_secondary_data, 'view count', default=None) or self._search_regex( r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', default=None) or self._search_regex( - r'\"viewCount\"\s*:\s*(\d+)', tahoe_secondary_data, 'view count', + r'[\'\"]viewCount[\'\"]\s*:\s*(\d+)', tahoe_secondary_data, 'view count', default=None) ) From 32fe6908c973a7977c208919385b3588b41935db Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Sun, 20 Jan 2019 10:33:16 +0200 Subject: [PATCH 21/43] Do not fail in case secondary data not exists. --- youtube_dl/extractor/facebook.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index ef195a891..2318cf4c7 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -354,7 +354,7 @@ class FacebookIE(InfoExtractor): video_id, transform_source=js_to_json, fatal=False) video_data = extract_from_jsmods_instances(server_js_data) - tahoe_secondary_data = '' + if not video_data: if not fatal_if_no_video: return webpage, False @@ -396,7 +396,9 @@ class FacebookIE(InfoExtractor): data=tahoe_request_data, headers=tahoe_request_headers, fatal=False ) - + if not tahoe_secondary_data: + tahoe_secondary_data = '' + tahoe_js_data = self._parse_json( self._search_regex( r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_primary_data, From cd805c064c4251856adc4e1c47379dcfee74dc88 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Sun, 20 Jan 2019 10:33:40 +0200 Subject: [PATCH 22/43] remove whitespace. --- youtube_dl/extractor/facebook.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 2318cf4c7..ec2a6a1be 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -354,7 +354,6 @@ class FacebookIE(InfoExtractor): video_id, transform_source=js_to_json, fatal=False) video_data = extract_from_jsmods_instances(server_js_data) - if not video_data: if not fatal_if_no_video: return webpage, False @@ -398,7 +397,7 @@ class FacebookIE(InfoExtractor): ) if not tahoe_secondary_data: tahoe_secondary_data = '' - + tahoe_js_data = self._parse_json( self._search_regex( r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_primary_data, From 61ee81c06b8a55581d478d7432331dd7eb0a3cfc Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Sun, 20 Jan 2019 11:10:36 +0200 Subject: [PATCH 23/43] v --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 362265964..444ec7009 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,5 +1,5 @@ from __future__ import unicode_literals -__version__ = 'vc.2019.01.17' +__version__ = 'vc.2019.01.20' From f6c212489cd6a85176a31bd936560e8339e53d06 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Tue, 22 Jan 2019 12:46:57 +0200 Subject: [PATCH 24/43] . --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 444ec7009..5fd1c8ac4 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,5 +1,5 @@ from __future__ import unicode_literals -__version__ = 'vc.2019.01.20' +__version__ = 'vc.2019.01.22' From 65f64270cdcb14c1699c4091a33ce1270579bf5c Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Mon, 28 Jan 2019 12:28:22 +0200 Subject: [PATCH 25/43] set default value for tahoe secondary data. --- youtube_dl/extractor/facebook.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index ec2a6a1be..e4d7ec235 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -354,6 +354,7 @@ class FacebookIE(InfoExtractor): video_id, transform_source=js_to_json, fatal=False) video_data = extract_from_jsmods_instances(server_js_data) + tahoe_secondary_data = '' if not video_data: if not fatal_if_no_video: return webpage, False From 2809c0e622a0dc2ae7cac1969b016ad7ae9dc00a Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Sun, 3 Feb 2019 12:00:09 +0200 Subject: [PATCH 26/43] refactor tahoe data --- youtube_dl/extractor/facebook.py | 94 +++++++++++++++++++------------- 1 file changed, 55 insertions(+), 39 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index e4d7ec235..66e99fdf1 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -354,7 +354,7 @@ class FacebookIE(InfoExtractor): video_id, transform_source=js_to_json, fatal=False) video_data = extract_from_jsmods_instances(server_js_data) - tahoe_secondary_data = '' + tahoe_data = FacebookTahoeData(self, webpage, video_id) if not video_data: if not fatal_if_no_video: return webpage, False @@ -365,43 +365,11 @@ class FacebookIE(InfoExtractor): expected=True) elif '>You must log in to continue' in webpage: self.raise_login_required() - # Video info not in first request, do a secondary request using # tahoe player specific URL - tahoe_request_data = urlencode_postdata( - { - '__a': 1, - '__pc': self._search_regex( - r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage, - 'pkg cohort', default='PHASED:DEFAULT'), - '__rev': self._search_regex( - r'client_revision["\']\s*:\s*(\d+),', webpage, - 'client revision', default='3944515'), - 'fb_dtsg': self._search_regex( - r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"', - webpage, 'dtsg token', default=''), - }) - tahoe_request_headers = { - 'Content-Type': 'application/x-www-form-urlencoded', - } - - tahoe_primary_data = self._download_webpage( - self._VIDEO_PAGE_TAHOE_TEMPLATE % (video_id, 'primary'), video_id, - data=tahoe_request_data, - headers=tahoe_request_headers - ) - - tahoe_secondary_data = self._download_webpage( - self._VIDEO_PAGE_TAHOE_TEMPLATE % (video_id, 'secondary'), video_id, - data=tahoe_request_data, - headers=tahoe_request_headers, fatal=False - ) - if not tahoe_secondary_data: - tahoe_secondary_data = '' - tahoe_js_data = self._parse_json( self._search_regex( - r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_primary_data, + r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data.primary, 'tahoe js data', default='{}'), video_id, fatal=False) @@ -456,19 +424,19 @@ class FacebookIE(InfoExtractor): 'fbPhotoPageAuthorName', webpage)) or self._search_regex( r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',default=None) or \ self._og_search_title(webpage, default=None) or self._search_regex( - r'\"ownerName\":"(.+?)"', tahoe_secondary_data, + r'\"ownerName\":"(.+?)"', tahoe_data.secondary, 'uploader_id', fatal=False) timestamp = int_or_none(self._search_regex( r']+data-utime=["\'](\d+)', webpage, 'timestamp', default=None) or self._search_regex( - r'data-utime=\\\"(\d+)\\\"', tahoe_secondary_data, + r'data-utime=\\\"(\d+)\\\"', tahoe_data.secondary, 'timestamp', default=None)) uploader_id = self._search_regex( r'ownerid:"([\d]+)', webpage, 'uploader_id', default=None) or self._search_regex( - r'[\'\"]ownerid[\'\"]\s*:\s*[\'\"](\d+)[\'\"]', tahoe_secondary_data, + r'[\'\"]ownerid[\'\"]\s*:\s*[\'\"](\d+)[\'\"]', tahoe_data.secondary, 'uploader_id', fatal=False) thumbnail = self._og_search_thumbnail(webpage) @@ -476,11 +444,11 @@ class FacebookIE(InfoExtractor): view_count = parse_count(self._search_regex( r'\bpostViewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', default=None) or self._search_regex( - r'[\'\"]postViewCount[\'\"]\s*:\s*(\d+)', tahoe_secondary_data, 'view count', + r'[\'\"]postViewCount[\'\"]\s*:\s*(\d+)', tahoe_data.secondary, 'view count', default=None) or self._search_regex( r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count', default=None) or self._search_regex( - r'[\'\"]viewCount[\'\"]\s*:\s*(\d+)', tahoe_secondary_data, 'view count', + r'[\'\"]viewCount[\'\"]\s*:\s*(\d+)', tahoe_data.secondary, 'view count', default=None) ) @@ -523,6 +491,54 @@ class FacebookIE(InfoExtractor): return info_dict +class FacebookTahoeData: + def __init__(self, extractor, page, video_id): + self._page = page + self._video_id = video_id + self._extractor = extractor + self._data = {} + + def _get_data(self, data_type): + if data_type in self._data: + data = self._data[data_type] + else: + req_data, headers = self._get_request_data_and_headers() + data = self._extractor._download_webpage( + self._extractor._VIDEO_PAGE_TAHOE_TEMPLATE % (self._video_id, data_type), self._video_id, + data=req_data, + headers=headers + ) + return '' if not data else data + + @property + def primary(self): + return self._get_data('primary') + + @property + def secondary(self): + return self._get_data('secondary') + + def _get_request_data_and_headers(self): + tahoe_request_data = urlencode_postdata( + { + '__a': 1, + '__pc': self._extractor._search_regex( + r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', self._page, + 'pkg cohort', default='PHASED:DEFAULT'), + '__rev': self._extractor._search_regex( + r'client_revision["\']\s*:\s*(\d+),', self._page, + 'client revision', default='3944515'), + 'fb_dtsg': self._extractor._search_regex( + r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"', + self._page, 'dtsg token', default=''), + }) + tahoe_request_headers = { + 'Content-Type': 'application/x-www-form-urlencoded', + } + + return tahoe_request_data, tahoe_request_headers + + class FacebookPluginsVideoIE(InfoExtractor): _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?Phttps.+)' From a6606f0bf62a33d36b2c26d98609839e46b69b31 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Sun, 3 Feb 2019 12:05:08 +0200 Subject: [PATCH 27/43] change version --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6c0866d0f..444fa83a6 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = 'vc.2019.02.03' +__version__ = 'vc.2019.02.03.1' From 343c86fa0bff0a4ea22039cc703a9a1a222470c8 Mon Sep 17 00:00:00 2001 From: Avichai Cohen Date: Tue, 2 Apr 2019 16:22:54 +0300 Subject: [PATCH 28/43] Adding is_live to info dictionary of facebook videos --- youtube_dl/extractor/facebook.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 789dd79d5..464ffd47f 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -218,6 +218,7 @@ class FacebookIE(InfoExtractor): 'ext': 'mp4', 'title': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses', 'uploader': 'ESL One Dota 2', + 'is_live': False }, 'params': { 'skip_download': True, @@ -379,6 +380,8 @@ class FacebookIE(InfoExtractor): if not video_data: raise ExtractorError('Cannot parse data') + is_live = video_data[0].get('is_broadcast', False) and video_data[0].get('is_live_stream', False) + formats = [] for f in video_data: format_id = f['stream_type'] @@ -442,6 +445,7 @@ class FacebookIE(InfoExtractor): 'timestamp': timestamp, 'thumbnail': thumbnail, 'view_count': view_count, + 'is_live': is_live } return webpage, info_dict From c62fd8a4eb70545acb099562bfd4024565da49f3 Mon Sep 17 00:00:00 2001 From: Avi Peretz Date: Sun, 28 Apr 2019 16:48:01 +0300 Subject: [PATCH 29/43] update version --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d4c1278b2..86fc25af9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = 'vc.2019.04.24' +__version__ = 'vc.2019.04.28' From 806b59d6148e5dab73b844c95982e4d1ad5c3541 Mon Sep 17 00:00:00 2001 From: Avichai Cohen Date: Sun, 28 Apr 2019 17:05:18 +0300 Subject: [PATCH 30/43] adding is_live to the info json of ok videos --- youtube_dl/extractor/odnoklassniki.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 114b93c07..9d2df2e2d 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -207,6 +207,7 @@ class OdnoklassnikiIE(InfoExtractor): assert title if provider == 'LIVE_TV_APP': info['title'] = self._live_title(title) + info['is_live'] = True quality = qualities(('4', '0', '1', '2', '3', '5')) From 3768a1fde8f8325590fab19c9502fe1efd03cca0 Mon Sep 17 00:00:00 2001 From: Avichai Cohen Date: Sun, 28 Apr 2019 17:05:27 +0300 Subject: [PATCH 31/43] adding test --- youtube_dl/extractor/odnoklassniki.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 9d2df2e2d..95d1ec22d 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -94,6 +94,21 @@ class OdnoklassnikiIE(InfoExtractor): 'skip_download': True, }, 'skip': 'Video has not been found', + },{ + # live video + 'url': 'https://www.ok.ru/video/1050794925929', + 'info_dict': { + 'id': '1050794925929', + 'title': 're:^Поиск репертуара [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'ext': 'mp4', + 'upload_date': u'20190428', + 'uploader': u'(((((КнЯзЬ ))))', + 'uploader_id': u'557343776873', + 'is_live': True + }, + 'params': { + 'skip_download': True, + } }, { 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', 'only_matching': True, From d224fceabca1a391abeadb37d6246428879f1ea1 Mon Sep 17 00:00:00 2001 From: Avichai Cohen Date: Sun, 28 Apr 2019 17:19:19 +0300 Subject: [PATCH 32/43] removing bad is_live code --- youtube_dl/extractor/odnoklassniki.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 44a19e77e..e716da7da 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -94,21 +94,6 @@ class OdnoklassnikiIE(InfoExtractor): 'skip_download': True, }, 'skip': 'Video has not been found', - },{ - # live video - 'url': 'https://www.ok.ru/video/1050794925929', - 'info_dict': { - 'id': '1050794925929', - 'title': 're:^Поиск репертуара [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', - 'ext': 'mp4', - 'upload_date': u'20190428', - 'uploader': u'(((((КнЯзЬ ))))', - 'uploader_id': u'557343776873', - 'is_live': True - }, - 'params': { - 'skip_download': True, - } }, { 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', 'only_matching': True, @@ -261,7 +246,6 @@ class OdnoklassnikiIE(InfoExtractor): assert title if provider == 'LIVE_TV_APP': info['title'] = self._live_title(title) - info['is_live'] = True quality = qualities(('4', '0', '1', '2', '3', '5')) From ed78d69c06e79ba08fe819017a69afa4915d884c Mon Sep 17 00:00:00 2001 From: Avichai Cohen Date: Sun, 28 Apr 2019 17:43:20 +0300 Subject: [PATCH 33/43] Revert "removing bad is_live code" This reverts commit d224fceabca1a391abeadb37d6246428879f1ea1. --- youtube_dl/extractor/odnoklassniki.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index e716da7da..44a19e77e 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -94,6 +94,21 @@ class OdnoklassnikiIE(InfoExtractor): 'skip_download': True, }, 'skip': 'Video has not been found', + },{ + # live video + 'url': 'https://www.ok.ru/video/1050794925929', + 'info_dict': { + 'id': '1050794925929', + 'title': 're:^Поиск репертуара [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'ext': 'mp4', + 'upload_date': u'20190428', + 'uploader': u'(((((КнЯзЬ ))))', + 'uploader_id': u'557343776873', + 'is_live': True + }, + 'params': { + 'skip_download': True, + } }, { 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', 'only_matching': True, @@ -246,6 +261,7 @@ class OdnoklassnikiIE(InfoExtractor): assert title if provider == 'LIVE_TV_APP': info['title'] = self._live_title(title) + info['is_live'] = True quality = qualities(('4', '0', '1', '2', '3', '5')) From cb4f53ec57d01f226fbf028b2acd2abd32e2d736 Mon Sep 17 00:00:00 2001 From: Avichai Cohen Date: Tue, 30 Apr 2019 16:27:06 +0300 Subject: [PATCH 34/43] updating version --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 86fc25af9..e74e4b60b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = 'vc.2019.04.28' +__version__ = 'vc.2019.04.30' From a169965a2a9a25e6ea0962aeb56056894f1670e0 Mon Sep 17 00:00:00 2001 From: Avichai Cohen Date: Sun, 5 May 2019 11:43:45 +0300 Subject: [PATCH 35/43] fixes by flake8 --- youtube_dl/extractor/odnoklassniki.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 44a19e77e..1eb3bdd2f 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -94,7 +94,7 @@ class OdnoklassnikiIE(InfoExtractor): 'skip_download': True, }, 'skip': 'Video has not been found', - },{ + }, { # live video 'url': 'https://www.ok.ru/video/1050794925929', 'info_dict': { @@ -147,7 +147,7 @@ class OdnoklassnikiIE(InfoExtractor): error = self._search_regex( r'
(?P.*?)<\/div>', - webpage, name='error',group='error', default=None) + webpage, name='error', group='error', default=None) if error: raise ExtractorError(error, expected=True) From b02e947a1455737d3bdfdefc2692f5108be269ab Mon Sep 17 00:00:00 2001 From: Avichai Cohen Date: Sun, 5 May 2019 11:44:38 +0300 Subject: [PATCH 36/43] adding live_status to facebook --- youtube_dl/extractor/facebook.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index cde8eb22c..8e6815189 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -382,7 +382,19 @@ class FacebookIE(InfoExtractor): if not video_data: raise ExtractorError('Cannot parse data') - is_live = video_data[0].get('is_broadcast', False) and video_data[0].get('is_live_stream', False) + is_scheduled = '"isScheduledLive":true' in tahoe_data.secondary + is_live_stream = video_data[0].get('is_live_stream', False) + is_broadcast = video_data[0].get('is_broadcast', False) + + live_status = 'not_live' + if is_broadcast: + live_status = 'completed' + if is_live_stream: + live_status = 'live' + if is_scheduled: + live_status = 'upcoming' + + is_live = live_status == 'live' formats = [] for f in video_data: @@ -468,7 +480,8 @@ class FacebookIE(InfoExtractor): 'thumbnail': thumbnail, 'view_count': view_count, 'uploader_id': uploader_id, - 'is_live': is_live + 'is_live': is_live, + 'live_status': live_status } return webpage, info_dict From ff87a1b642833693b0cfaef82ae20f47585b530d Mon Sep 17 00:00:00 2001 From: Avichai Cohen Date: Sun, 5 May 2019 14:02:39 +0300 Subject: [PATCH 37/43] updating version --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e74e4b60b..ad3d8ebdc 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = 'vc.2019.04.30' +__version__ = 'vc.2019.05.05' From a875d4d0db798cc015cd6d1812bd34530b50c1e0 Mon Sep 17 00:00:00 2001 From: Avichai Cohen Date: Tue, 11 Jun 2019 11:43:46 +0300 Subject: [PATCH 38/43] just adding a comment to test pull bot --- youtube_dl/extractor/facebook.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 8e6815189..71b987ad3 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -458,6 +458,7 @@ class FacebookIE(InfoExtractor): r'[\'\"]ownerid[\'\"]\s*:\s*[\'\"](\d+)[\'\"]', tahoe_data.secondary, 'uploader_id', fatal=False) + # just adding a comment thumbnail = self._og_search_thumbnail(webpage) view_count = parse_count(self._search_regex( From 7cd45984b8baf2ae38df8f2c299196aa32d9376b Mon Sep 17 00:00:00 2001 From: Avichai Cohen Date: Tue, 11 Jun 2019 12:23:14 +0300 Subject: [PATCH 39/43] adding pull.yml file --- .github/pull.yml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .github/pull.yml diff --git a/.github/pull.yml b/.github/pull.yml new file mode 100644 index 000000000..78960b078 --- /dev/null +++ b/.github/pull.yml @@ -0,0 +1,5 @@ +version: "1" +rules: + - base: master + upstream: aviperes:Fix.25.12.2018 # change `wei` to the owner of upstream repo + mergeMethod: hardreset From 36effdc65370cbb5d94d4ddb1edf135dd3514244 Mon Sep 17 00:00:00 2001 From: Avichai Cohen Date: Tue, 11 Jun 2019 12:28:15 +0300 Subject: [PATCH 40/43] just changing a comment --- youtube_dl/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 71b987ad3..6d4dbce38 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -458,7 +458,7 @@ class FacebookIE(InfoExtractor): r'[\'\"]ownerid[\'\"]\s*:\s*[\'\"](\d+)[\'\"]', tahoe_data.secondary, 'uploader_id', fatal=False) - # just adding a comment + # just changing a comment thumbnail = self._og_search_thumbnail(webpage) view_count = parse_count(self._search_regex( From e2b4f931b3e45165542cfc9dfeb7d367050fc214 Mon Sep 17 00:00:00 2001 From: Avichai Cohen Date: Tue, 11 Jun 2019 12:34:58 +0300 Subject: [PATCH 41/43] Revert "adding pull.yml file" This reverts commit 7cd45984b8baf2ae38df8f2c299196aa32d9376b. --- .github/pull.yml | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 .github/pull.yml diff --git a/.github/pull.yml b/.github/pull.yml deleted file mode 100644 index 78960b078..000000000 --- a/.github/pull.yml +++ /dev/null @@ -1,5 +0,0 @@ -version: "1" -rules: - - base: master - upstream: aviperes:Fix.25.12.2018 # change `wei` to the owner of upstream repo - mergeMethod: hardreset From e58c09de9b951d7edd2599fcbca23353e4c691e5 Mon Sep 17 00:00:00 2001 From: Avichai Cohen Date: Tue, 11 Jun 2019 12:39:38 +0300 Subject: [PATCH 42/43] changing the commit --- youtube_dl/extractor/facebook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 6d4dbce38..6188f317d 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -458,7 +458,7 @@ class FacebookIE(InfoExtractor): r'[\'\"]ownerid[\'\"]\s*:\s*[\'\"](\d+)[\'\"]', tahoe_data.secondary, 'uploader_id', fatal=False) - # just changing a comment + # just changing a comment again thumbnail = self._og_search_thumbnail(webpage) view_count = parse_count(self._search_regex( From d6ad71cb7ae2fbd036246042f0c7696977d51fa3 Mon Sep 17 00:00:00 2001 From: Avichai Cohen Date: Thu, 13 Jun 2019 11:43:01 +0300 Subject: [PATCH 43/43] removing the comment I used to test pull --- youtube_dl/extractor/facebook.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 6188f317d..c4555142e 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -457,8 +457,6 @@ class FacebookIE(InfoExtractor): 'uploader_id', default=None) or self._search_regex( r'[\'\"]ownerid[\'\"]\s*:\s*[\'\"](\d+)[\'\"]', tahoe_data.secondary, 'uploader_id', fatal=False) - - # just changing a comment again thumbnail = self._og_search_thumbnail(webpage) view_count = parse_count(self._search_regex(