diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ef803b8a7..8f2fac787 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1214,7 +1214,7 @@ from .tweakers import TweakersIE from .twentyfourvideo import TwentyFourVideoIE from .twentymin import TwentyMinutenIE from .twentythreevideo import TwentyThreeVideoIE -from .twitcasting import TwitCastingIE +from .twitcasting import TwitCastingIE, TwitCastingHistoryIE from .twitch import ( TwitchVideoIE, TwitchChapterIE, diff --git a/youtube_dl/extractor/twitcasting.py b/youtube_dl/extractor/twitcasting.py index 2dbe89f5b..d0bed5b0a 100644 --- a/youtube_dl/extractor/twitcasting.py +++ b/youtube_dl/extractor/twitcasting.py @@ -1,11 +1,12 @@ # coding: utf-8 from __future__ import unicode_literals +import re +import itertools + from .common import InfoExtractor from ..utils import urlencode_postdata -import re - class TwitCastingIE(InfoExtractor): _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P[^/]+)/movie/(?P\d+)' @@ -56,15 +57,25 @@ class TwitCastingIE(InfoExtractor): r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)(?:(?!\1).)+)\1', - r'(["\'])(?Phttp.+?\.m3u8.*?)\1'), - webpage, 'm3u8 url', group='url') - - formats = self._extract_m3u8_formats( - m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', - m3u8_id='hls') + # m3u8_url = self._search_regex( + # (r'data-movie-url=(["\'])(?P(?:(?!\1).)+)\1', + # r'(["\'])(?Phttp.+?\.m3u8.*?)\1'), + # webpage, 'm3u8 url', group='url') + # m3u8_url = m3u8_url.replace('\\/', '/') + # formats = self._extract_m3u8_formats( + # m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', + # m3u8_id='hls') + formats = [ + { + 'url': "http://dl01.twitcasting.tv/{uploader_id}/download/{video_id}?dl=1".format(uploader_id=uploader_id, video_id=video_id), + 'ext': 'mp4', + } + ] thumbnail = self._og_search_thumbnail(webpage) description = self._og_search_description( @@ -79,3 +90,73 @@ class TwitCastingIE(InfoExtractor): 'uploader_id': uploader_id, 'formats': formats, } + + +class TwitCastingHistoryIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P[^/]+)/show' + _TESTS = [ + { + 'url': 'https://twitcasting.tv/mttbernardini/show/', + 'info_dict': { + 'title': 'Matteo Bernardini', + 'id': 'mttbernardini', + }, + 'playlist_count': 1, + }, + ] + + def _get_meta_and_entries(self, url): + for page_num in itertools.count(0): + page_url = "{}/{}".format(url.rstrip('/'), page_num) + pagenum = None + list_id = None + webpage = self._download_webpage( + page_url, list_id, + 'Downloading page %s' % pagenum) + + if page_num == 0: + # title = re.search(r'(.*)', webpage) + title = re.search(r'(?s)<[^>]+class=["\']tw-user-nav-name[^>]+>(.+?)1') != -1 + if page_num != 0 and first_page_selected: + break + + matches = re.finditer(r''']+class=["']tw-movie-thumbnail["'][^>]+href="(.+)"[^>]+>((?:\n|.)*?)''', webpage) + matches = list(matches) + + for match in matches: + href = match.group(1) + inner = match.group(2) + # if REC isn't present either a live broadcast or an image + # e.g. https://twitcasting.tv/marrynontan/movie/506296434 + if 'REC' not in inner: + continue + + # skip videos that require a password + # e.g. https://twitcasting.tv/mttbernardini/movie/3689740 + locked = re.search(r'''src="/img/locked.png"''', inner) + if locked is not None: + continue + + title = re.search(r'''<[^>]+class=["']tw-movie-thumbnail-title[^>]+>[ \n]*?(.+?) *?