diff --git a/test/ci/__init__.py b/test/ci/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/test/ci/test_tiktok.py b/test/ci/test_tiktok.py new file mode 100644 index 000000000..c0a93d61f --- /dev/null +++ b/test/ci/test_tiktok.py @@ -0,0 +1,31 @@ +import unittest +import youtube_dl + + +class TikTokTestYoutubeDl(unittest.TestCase): + def test_meta_data(self): + url = 'https://www.tiktok.com/@oriangaon/video/6807126376001441030' + params = {} + ydl = youtube_dl.YoutubeDL(params) + info = ydl.extract_info(url, download=False) + self.assertEquals(info['id'], '6807126376001441030') + self.assertEquals(info['url'], 'https://www.tiktok.com/@oriangaon/video/6807126376001441030') + self.assertEquals(info['title'], '#foryou #foyou Mmmmm....,,') + self.assertEquals(info['uploader'], 'Oriangaon') + self.assertEquals(info['timestamp'], 1584907616) + self.assertEquals(info['thumbnail'], + 'https://p16-va-default.akamaized.net/obj/tos-maliva-p-0068/d1a8fbd3e42dda3a1baa01ee9edad289') + self.assertGreaterEqual(info['view_count'], 79864) + self.assertEquals(info['uploader_id'], '6772113344733955077') + self.assertFalse(info['is_live']) + self.assertEquals(info['live_status'], 'not_live') + self.assertGreaterEqual(info['like_count'], 2213) + self.assertGreaterEqual(info['share_count'], 109) + self.assertGreaterEqual(info['comment_count'], 40) + self.assertEquals(info['duration'], 10) + self.assertEquals(info['ext'], 'mp.4') + self.assertGreater(len(info['embed_code']),0) + + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/extractor/tiktok.py b/youtube_dl/extractor/tiktok.py index 66088b9ab..4a2d3b94f 100644 --- a/youtube_dl/extractor/tiktok.py +++ b/youtube_dl/extractor/tiktok.py @@ -1,6 +1,7 @@ -# coding: utf-8 from __future__ import unicode_literals - +import ast +from bs4 import BeautifulSoup +import json from .common import InfoExtractor from ..utils import ( compat_str, @@ -8,8 +9,7 @@ from ..utils import ( int_or_none, str_or_none, try_get, - url_or_none, -) + url_or_none) class TikTokBaseIE(InfoExtractor): @@ -69,7 +69,8 @@ class TikTokIE(TikTokBaseIE): https?:// (?: (?:m\.)?tiktok\.com/v| - (?:www\.)?tiktok\.com/share/video + (?:www\.)?tiktok\.com/share/video| + (?:www\.|)tiktok\.com\/@(?:.*?)\/video ) /(?P\d+) ''' @@ -95,11 +96,53 @@ class TikTokIE(TikTokBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage( - 'https://m.tiktok.com/v/%s.html' % video_id, video_id) - data = self._parse_json(self._search_regex( - r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id) - return self._extract_aweme(data) + json_api = self._download_json('https://www.tiktok.com/oembed?url=' + url, video_id) + + webpage = self._download_webpage(url, video_id) + soup = BeautifulSoup(webpage, features="html.parser") + json_next_data = soup.find(id='__NEXT_DATA__') + props = json_next_data.contents[0] + json_data_encode = json.dumps(props.encode('utf-8')) + ast_le = ast.literal_eval(json_data_encode) + data_dict = json.loads(ast_le) + + item_info = data_dict['props']['pageProps']['videoData']['itemInfos'] + timestamp = int(item_info['createTime']) + shares = item_info['shareCount'] + views = item_info['playCount'] + duration = item_info['video']['videoMeta']['duration'] + provider_id = item_info['authorId'] + comments_count = item_info['commentCount'] + likes_count = item_info['diggCount'] + + return self.info_dict(video_id, str(url), json_api['title'], + json_api['author_name'], timestamp, json_api['thumbnail_url'], + views, provider_id, False, 'not_live', likes_count, shares, '', comments_count, duration, json_api['html']) + + def info_dict(self, video_id, url, video_title, + uploader, timestamp, thumbnail, + view_count, uploader_id, is_live, live_status + , likes_count, shares_count, subtitles, comment_count, duration, embed_code): + info_dict = { + 'id': video_id, + 'url': url, + 'title': video_title, + 'uploader': uploader, + 'timestamp': timestamp, + 'thumbnail': thumbnail, + 'view_count': view_count, + 'uploader_id': uploader_id, + 'is_live': is_live, + 'live_status': live_status, + 'like_count': likes_count, + 'share_count': shares_count, + 'subtitles': subtitles, + 'comment_count': comment_count, + 'duration': duration, + 'ext':'mp.4', + 'embed_code': embed_code + } + return info_dict class TikTokUserIE(TikTokBaseIE):