l1ving_youtube-dl/youtube_dl/extractor/twitcasting.py

# coding: utf-8
from __future__ import unicode_literals

import re
import itertools

from .common import InfoExtractor
from ..utils import urlencode_postdata


class TwitCastingIE(InfoExtractor):
    _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609',
        'md5': '745243cad58c4681dc752490f7540d7f',
        'info_dict': {
            'id': '2357609',
            'ext': 'mp4',
            'title': 'Live #2357609',
            'uploader_id': 'ivetesangalo',
            'description': "Moi! I'm live on TwitCasting from my iPhone.",
            'thumbnail': r're:^https?://.*\.jpg$',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'https://twitcasting.tv/mttbernardini/movie/3689740',
        'info_dict': {
            'id': '3689740',
            'ext': 'mp4',
            'title': 'Live playing something #3689740',
            'uploader_id': 'mttbernardini',
            'description': "I'm live on TwitCasting from my iPad. password: abc (Santa Marinella/Lazio, Italia)",
            'thumbnail': r're:^https?://.*\.jpg$',
        },
        'params': {
            'skip_download': True,
            'videopassword': 'abc',
        },
    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        uploader_id = mobj.group('uploader_id')

        video_password = self._downloader.params.get('videopassword')
        request_data = None
        if video_password:
            request_data = urlencode_postdata({
                'password': video_password,
            })
        webpage = self._download_webpage(url, video_id, data=request_data)

        title = self._html_search_regex(
            r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)</',
            webpage, 'title', default=None) or self._html_search_meta(
            'twitter:title', webpage, fatal=True)
        # title is split across lines with lots of whitespace
        title = title.replace('\n', ' ')
        while '  ' in title:
            title = title.replace('  ', ' ')

        m3u8_url = self._search_regex(
            (r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
             r'(["\'])(?P<url>http.+?\.m3u8.*?)\1'),
            webpage, 'm3u8 url', group='url')
        m3u8_url = m3u8_url.replace('\\/', '/')
        formats = self._extract_m3u8_formats(
            m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
            m3u8_id='hls')

        thumbnail = self._og_search_thumbnail(webpage)
        description = self._og_search_description(
            webpage, default=None) or self._html_search_meta(
            'twitter:description', webpage)

        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'uploader_id': uploader_id,
            'formats': formats,
        }


class TwitCastingHistoryIE(InfoExtractor):
    _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/show'
    _TESTS = [
        {
            'url': 'https://twitcasting.tv/mttbernardini/show/',
            'info_dict': {
                'title': 'Matteo Bernardini',
                'id': 'mttbernardini',
            },
            'playlist_count': 1,
        },
    ]

    def _get_meta_and_entries(self, url):
        for page_num in itertools.count(0):
            page_url = "{}/{}".format(url.rstrip('/'), page_num)
            pagenum = None
            list_id = None
            webpage = self._download_webpage(
                page_url, list_id,
                'Downloading page %s' % pagenum)

            if page_num == 0:
                # title = re.search(r'<span class="tw-user-nav-name">(.*)</span>', webpage)
                title = re.search(r'(?s)<[^>]+class=["\']tw-user-nav-name[^>]+>(.+?)</', webpage)
                title = title.group(1).strip()
                user_id = re.search(r'data-user-id="(.*)"', webpage)
                user_id = user_id.group(1).strip()
                yield (title, user_id)

            first_page_selected = webpage.find('class="selected">1</a>') != -1
            if page_num != 0 and first_page_selected:
                break

            matches = re.finditer(r'''<a[^>]+class=["']tw-movie-thumbnail["'][^>]+href="(.+)"[^>]+>((?:\n|.)*?)</a>''', webpage)
            matches = list(matches)

            for match in matches:
                href = match.group(1)
                inner = match.group(2)
                # if REC isn't present either a live broadcast or an image
                # e.g. https://twitcasting.tv/marrynontan/movie/506296434
                if 'REC' not in inner:
                    continue

                # skip videos that require a password
                # e.g. https://twitcasting.tv/mttbernardini/movie/3689740
                locked = re.search(r'''src="/img/locked.png"''', inner)
                if locked is not None:
                    continue

                title = re.search(r'''<[^>]+class=["']tw-movie-thumbnail-title[^>]+>[ \n]*?(.+?) *?</''', inner)
                if title is not None:
                    title = title.group(1).strip()

                video_url = 'https://twitcasting.tv{}'.format(href)
                video_id = href.split('/')[-1]
                result = self.url_result(video_url, ie=TwitCastingIE.ie_key(), video_id=video_id, video_title=title)
                yield result

    def _real_extract(self, url):
        entries = self._get_meta_and_entries(url)

        (title, user_id) = next(entries)

        result = self.playlist_result(entries, playlist_title=title, playlist_id=user_id)

        return result
[twitcasting] Add extractor 2018-10-27 03:40:44 +09:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

Update twitcasting extractor and add twitcasting history extractor 2020-04-21 15:21:17 -07:00			`import re`
			`import itertools`

[twitcasting] Add extractor 2018-10-27 03:40:44 +09:00			`from .common import InfoExtractor`
[twitcasting] Add support for private videos (#20843) 2019-04-27 01:17:40 +09:00			`from ..utils import urlencode_postdata`
[twitcasting] Add extractor 2018-10-27 03:40:44 +09:00

[twitcasting] Improve extraction and fix issues (closes #17981) 2018-11-03 00:27:36 +07:00			`class TwitCastingIE(InfoExtractor):`
			`_VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)'`
[twitcasting] Add support for private videos (#20843) 2019-04-27 01:17:40 +09:00			`_TESTS = [{`
[twitcasting] Add extractor 2018-10-27 03:40:44 +09:00			`'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609',`
			`'md5': '745243cad58c4681dc752490f7540d7f',`
			`'info_dict': {`
			`'id': '2357609',`
			`'ext': 'mp4',`
[twitcasting] Fix test: video title (#20840) 2019-04-26 18:34:23 +09:00			`'title': 'Live #2357609',`
[twitcasting] Add extractor 2018-10-27 03:40:44 +09:00			`'uploader_id': 'ivetesangalo',`
			`'description': "Moi! I'm live on TwitCasting from my iPhone.",`
			`'thumbnail': r're:^https?://.*\.jpg$',`
[twitcasting] Improve extraction and fix issues (closes #17981) 2018-11-03 00:27:36 +07:00			`},`
			`'params': {`
			`'skip_download': True,`
			`},`
[twitcasting] Add support for private videos (#20843) 2019-04-27 01:17:40 +09:00			`}, {`
			`'url': 'https://twitcasting.tv/mttbernardini/movie/3689740',`
			`'info_dict': {`
			`'id': '3689740',`
			`'ext': 'mp4',`
			`'title': 'Live playing something #3689740',`
			`'uploader_id': 'mttbernardini',`
			`'description': "I'm live on TwitCasting from my iPad. password: abc (Santa Marinella/Lazio, Italia)",`
			`'thumbnail': r're:^https?://.*\.jpg$',`
			`},`
			`'params': {`
			`'skip_download': True,`
			`'videopassword': 'abc',`
			`},`
			`}]`
[twitcasting] Add extractor 2018-10-27 03:40:44 +09:00
			`def _real_extract(self, url):`
			`mobj = re.match(self._VALID_URL, url)`
[twitcasting] Improve extraction and fix issues (closes #17981) 2018-11-03 00:27:36 +07:00			`video_id = mobj.group('id')`
[twitcasting] Add extractor 2018-10-27 03:40:44 +09:00			`uploader_id = mobj.group('uploader_id')`

[twitcasting] Add support for private videos (#20843) 2019-04-27 01:17:40 +09:00			`video_password = self._downloader.params.get('videopassword')`
			`request_data = None`
			`if video_password:`
			`request_data = urlencode_postdata({`
			`'password': video_password,`
			`})`
			`webpage = self._download_webpage(url, video_id, data=request_data)`
[twitcasting] Add extractor 2018-10-27 03:40:44 +09:00
[twitcasting] Improve extraction and fix issues (closes #17981) 2018-11-03 00:27:36 +07:00			`title = self._html_search_regex(`
			`r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)</',`
			`webpage, 'title', default=None) or self._html_search_meta(`
			`'twitter:title', webpage, fatal=True)`
Update twitcasting extractor and add twitcasting history extractor 2020-04-21 15:21:17 -07:00			`# title is split across lines with lots of whitespace`
			`title = title.replace('\n', ' ')`
			`while ' ' in title:`
			`title = title.replace(' ', ' ')`
[twitcasting] Improve extraction and fix issues (closes #17981) 2018-11-03 00:27:36 +07:00
Update twitcasting extractor 2020-09-18 02:12:20 -07:00			`m3u8_url = self._search_regex(`
			`(r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',`
			`r'(["\'])(?P<url>http.+?\.m3u8.*?)\1'),`
			`webpage, 'm3u8 url', group='url')`
			`m3u8_url = m3u8_url.replace('\\/', '/')`
			`formats = self._extract_m3u8_formats(`
			`m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',`
			`m3u8_id='hls')`
[twitcasting] Improve extraction and fix issues (closes #17981) 2018-11-03 00:27:36 +07:00
[twitcasting] Add extractor 2018-10-27 03:40:44 +09:00			`thumbnail = self._og_search_thumbnail(webpage)`
[twitcasting] Improve extraction and fix issues (closes #17981) 2018-11-03 00:27:36 +07:00			`description = self._og_search_description(`
			`webpage, default=None) or self._html_search_meta(`
			`'twitter:description', webpage)`

			`return {`
[twitcasting] Add extractor 2018-10-27 03:40:44 +09:00			`'id': video_id,`
			`'title': title,`
			`'description': description,`
			`'thumbnail': thumbnail,`
			`'uploader_id': uploader_id,`
			`'formats': formats,`
			`}`
Update twitcasting extractor and add twitcasting history extractor 2020-04-21 15:21:17 -07:00

			`class TwitCastingHistoryIE(InfoExtractor):`
			`_VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/show'`
			`_TESTS = [`
			`{`
			`'url': 'https://twitcasting.tv/mttbernardini/show/',`
			`'info_dict': {`
			`'title': 'Matteo Bernardini',`
			`'id': 'mttbernardini',`
			`},`
			`'playlist_count': 1,`
			`},`
			`]`

			`def _get_meta_and_entries(self, url):`
			`for page_num in itertools.count(0):`
			`page_url = "{}/{}".format(url.rstrip('/'), page_num)`
			`pagenum = None`
			`list_id = None`
			`webpage = self._download_webpage(`
			`page_url, list_id,`
			`'Downloading page %s' % pagenum)`

			`if page_num == 0:`
			`# title = re.search(r'<span class="tw-user-nav-name">(.*)</span>', webpage)`
			`title = re.search(r'(?s)<[^>]+class=["\']tw-user-nav-name[^>]+>(.+?)</', webpage)`
			`title = title.group(1).strip()`
			`user_id = re.search(r'data-user-id="(.*)"', webpage)`
			`user_id = user_id.group(1).strip()`
			`yield (title, user_id)`

			`first_page_selected = webpage.find('class="selected">1</a>') != -1`
			`if page_num != 0 and first_page_selected:`
			`break`

			`matches = re.finditer(r'''<a[^>]+class=["']tw-movie-thumbnail["'][^>]+href="(.+)"[^>]+>((?:\n\|.)*?)</a>''', webpage)`
			`matches = list(matches)`

			`for match in matches:`
			`href = match.group(1)`
			`inner = match.group(2)`
			`# if REC isn't present either a live broadcast or an image`
			`# e.g. https://twitcasting.tv/marrynontan/movie/506296434`
			`if 'REC' not in inner:`
			`continue`

			`# skip videos that require a password`
			`# e.g. https://twitcasting.tv/mttbernardini/movie/3689740`
			`locked = re.search(r'''src="/img/locked.png"''', inner)`
			`if locked is not None:`
			`continue`

			`title = re.search(r'''<[^>]+class=["']tw-movie-thumbnail-title[^>]+>[ \n]?(.+?) ?</''', inner)`
			`if title is not None:`
			`title = title.group(1).strip()`

			`video_url = 'https://twitcasting.tv{}'.format(href)`
			`video_id = href.split('/')[-1]`
			`result = self.url_result(video_url, ie=TwitCastingIE.ie_key(), video_id=video_id, video_title=title)`
			`yield result`

			`def _real_extract(self, url):`
			`entries = self._get_meta_and_entries(url)`

			`(title, user_id) = next(entries)`

			`result = self.playlist_result(entries, playlist_title=title, playlist_id=user_id)`

			`return result`