l1ving_youtube-dl/youtube_dl/extractor/cammodels.py

from __future__ import unicode_literals
from .common import InfoExtractor
from .common import ExtractorError
import json
import re


class CamModelsIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>\w+)'
    _HEADERS = {
        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36'
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url_or_request=url, video_id=video_id, headers=self._HEADERS)
        manifest_url = self._get_manifest_url_from_webpage(video_id=video_id, webpage=webpage)
        manifest = self._get_manifest_from_manifest_url(manifest_url=manifest_url, video_id=video_id, webpage=webpage)
        formats = self._get_formats_from_manifest(manifest=manifest, video_id=video_id)
        return {
            'id': video_id,
            'title': self._live_title(video_id),
            'formats': formats
        }

    def _get_manifest_url_from_webpage(self, video_id, webpage):
        manifest_url_root = self._html_search_regex(
            pattern=r'manifestUrlRoot=(?P<id>https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))',
            string=webpage,
            name='manifest',
            fatal=False)
        if not manifest_url_root:
            offline = self._html_search_regex(
                pattern=r'(?P<id>I\'m offline, but let\'s stay connected!)',
                string=webpage,
                name='offline indicator',
                fatal=False)
            if offline:
                raise ExtractorError(
                    msg='This user is currently offline, so nothing can be downloaded.',
                    expected=True,
                    video_id=video_id)
            private = self._html_search_regex(
                pattern=r'(?P<id>I’m in a private show right now)',
                string=webpage,
                name='private show indicator',
                fatal=False)
            if private:
                raise ExtractorError(
                    msg='This user is doing a private show, which requires payment. This extractor currently does not support private streams.',
                    expected=True,
                    video_id=video_id)
            raise ExtractorError(
                msg='Unable to find link to stream info on webpage. Room is not offline, so something else is wrong.',
                expected=False,
                video_id=video_id)
        manifest_url = manifest_url_root + video_id + '.json'
        return manifest_url

    def _get_manifest_from_manifest_url(self, manifest_url, video_id, webpage):
        manifest = self._download_json(url_or_request=manifest_url, video_id=video_id, headers=self._HEADERS, fatal=False)
        if not manifest:
            raise ExtractorError(
                msg='Link to stream info was found, but we couldn\'t access it. This stream may require login.',
                expected=False,
                video_id=video_id)
        return manifest

    def _get_formats_from_manifest(self, manifest, video_id):
        try:
            rtmp_formats = manifest['formats']['mp4-rtmp']['encodings']
            formats = []
            for format in rtmp_formats:
                formats.append({
                    'ext': 'flv',
                    'url': format.get('location'),
                    'width': format.get('videoWidth'),
                    'height': format.get('videoHeight'),
                    'vbr': format.get('videoKbps'),
                    'abr': format.get('audioKbps'),
                    'format_id': str(format.get('videoWidth'))
                })
        # If they change the JSON format, then fallback to parsing out RTMP links via regex.
        except:
            manifest_json = json.dumps(manifest)
            manifest_links = re.finditer(
                pattern=r'(?P<id>rtmp?:\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#&//=]*))',
                string=manifest_json)
            if not manifest_links:
                raise ExtractorError(
                    msg='Link to stream info was found, but we couldn\'t read the response. This is probably a bug.',
                    expected=False,
                    video_id=video_id)
            formats = []
            for manifest_link in manifest_links:
                url = manifest_link.group('id')
                formats.append({
                    'ext': 'flv',
                    'url': url,
                    'format_id': url.split(sep='/')[-1]
                })
        self._sort_formats(formats)
        return formats
-												Initial commit

											
										
										
											2017-10-14 22:09:44 -07:00
+								from __future__ import unicode_literals
 								from .common import InfoExtractor
-												Remove useless test, add fallback logic to manifest parser, and provide useful errors

											
										
										
											2017-10-15 13:58:37 -07:00
+								from .common import ExtractorError
 								import json
-												Fix some fallback logic for RTMP link parsing

											
										
										
											2017-10-15 14:26:05 -07:00
+								import re
-												Initial commit

											
										
										
											2017-10-14 22:09:44 -07:00
-												Fix Flake8 style violations

											
										
										
											2017-10-14 22:16:06 -07:00
-												Initial commit

											
										
										
											2017-10-14 22:09:44 -07:00
+								class CamModelsIE(InfoExtractor):
 								    _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>\w+)'
-												Remove useless test, add fallback logic to manifest parser, and provide useful errors

											
										
										
											2017-10-15 13:58:37 -07:00
+								    _HEADERS = {
 								        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36'
 								    }
-												Initial commit

											
										
										
											2017-10-14 22:09:44 -07:00
 								    def _real_extract(self, url):
 								        video_id = self._match_id(url)
-												Remove useless test, add fallback logic to manifest parser, and provide useful errors

											
										
										
											2017-10-15 13:58:37 -07:00
+								        webpage = self._download_webpage(url_or_request=url, video_id=video_id, headers=self._HEADERS)
 								        manifest_url = self._get_manifest_url_from_webpage(video_id=video_id, webpage=webpage)
 								        manifest = self._get_manifest_from_manifest_url(manifest_url=manifest_url, video_id=video_id, webpage=webpage)
 								        formats = self._get_formats_from_manifest(manifest=manifest, video_id=video_id)
-												Initial commit

											
										
										
											2017-10-14 22:09:44 -07:00
+								        return {
 								            'id': video_id,
 								            'title': self._live_title(video_id),
 								            'formats': formats
-												Fix Flake8 style violations

											
										
										
											2017-10-14 22:16:06 -07:00
+								        }
-												Remove useless test, add fallback logic to manifest parser, and provide useful errors

											
										
										
											2017-10-15 13:58:37 -07:00
 								    def _get_manifest_url_from_webpage(self, video_id, webpage):
-												Inlined strings that were only used once

											
										
										
											2017-10-15 15:38:29 -07:00
+								        manifest_url_root = self._html_search_regex(
 								            pattern=r'manifestUrlRoot=(?P<id>https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*))',
 								            string=webpage,
 								            name='manifest',
 								            fatal=False)
-												Remove useless test, add fallback logic to manifest parser, and provide useful errors

											
										
										
											2017-10-15 13:58:37 -07:00
+								        if not manifest_url_root:
-												Inlined strings that were only used once

											
										
										
											2017-10-15 15:38:29 -07:00
+								            offline = self._html_search_regex(
 								                pattern=r'(?P<id>I\'m offline, but let\'s stay connected!)',
 								                string=webpage,
 								                name='offline indicator',
 								                fatal=False)
-												Remove useless test, add fallback logic to manifest parser, and provide useful errors

											
										
										
											2017-10-15 13:58:37 -07:00
+								            if offline:
-												Inlined strings that were only used once

											
										
										
											2017-10-15 15:38:29 -07:00
+								                raise ExtractorError(
 								                    msg='This user is currently offline, so nothing can be downloaded.',
 								                    expected=True,
 								                    video_id=video_id)
 								            private = self._html_search_regex(
 								                pattern=r'(?P<id>I’m in a private show right now)',
 								                string=webpage,
 								                name='private show indicator',
 								                fatal=False)
-												Remove useless test, add fallback logic to manifest parser, and provide useful errors

											
										
										
											2017-10-15 13:58:37 -07:00
+								            if private:
-												Inlined strings that were only used once

											
										
										
											2017-10-15 15:38:29 -07:00
+								                raise ExtractorError(
 								                    msg='This user is doing a private show, which requires payment. This extractor currently does not support private streams.',
 								                    expected=True,
 								                    video_id=video_id)
 								            raise ExtractorError(
 								                msg='Unable to find link to stream info on webpage. Room is not offline, so something else is wrong.',
 								                expected=False,
 								                video_id=video_id)
-												Remove useless test, add fallback logic to manifest parser, and provide useful errors

											
										
										
											2017-10-15 13:58:37 -07:00
+								        manifest_url = manifest_url_root + video_id + '.json'
 								        return manifest_url
 								    def _get_manifest_from_manifest_url(self, manifest_url, video_id, webpage):
 								        manifest = self._download_json(url_or_request=manifest_url, video_id=video_id, headers=self._HEADERS, fatal=False)
 								        if not manifest:
-												Inlined strings that were only used once

											
										
										
											2017-10-15 15:38:29 -07:00
+								            raise ExtractorError(
 								                msg='Link to stream info was found, but we couldn\'t access it. This stream may require login.',
 								                expected=False,
 								                video_id=video_id)
-												Remove useless test, add fallback logic to manifest parser, and provide useful errors

											
										
										
											2017-10-15 13:58:37 -07:00
+								        return manifest
 								    def _get_formats_from_manifest(self, manifest, video_id):
 								        try:
 								            rtmp_formats = manifest['formats']['mp4-rtmp']['encodings']
 								            formats = []
 								            for format in rtmp_formats:
 								                formats.append({
 								                    'ext': 'flv',
 								                    'url': format.get('location'),
 								                    'width': format.get('videoWidth'),
 								                    'height': format.get('videoHeight'),
 								                    'vbr': format.get('videoKbps'),
 								                    'abr': format.get('audioKbps'),
 								                    'format_id': str(format.get('videoWidth'))
 								                })
 								        # If they change the JSON format, then fallback to parsing out RTMP links via regex.
 								        except:
 								            manifest_json = json.dumps(manifest)
-												Inlined strings that were only used once

											
										
										
											2017-10-15 15:38:29 -07:00
+								            manifest_links = re.finditer(
 								                pattern=r'(?P<id>rtmp?:\/\/[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#&//=]*))',
 								                string=manifest_json)
-												Remove useless test, add fallback logic to manifest parser, and provide useful errors

											
										
										
											2017-10-15 13:58:37 -07:00
+								            if not manifest_links:
-												Inlined strings that were only used once

											
										
										
											2017-10-15 15:38:29 -07:00
+								                raise ExtractorError(
 								                    msg='Link to stream info was found, but we couldn\'t read the response. This is probably a bug.',
 								                    expected=False,
 								                    video_id=video_id)
-												Remove useless test, add fallback logic to manifest parser, and provide useful errors

											
										
										
											2017-10-15 13:58:37 -07:00
+								            formats = []
 								            for manifest_link in manifest_links:
-												Fix some fallback logic for RTMP link parsing

											
										
										
											2017-10-15 14:26:05 -07:00
+								                url = manifest_link.group('id')
-												Remove useless test, add fallback logic to manifest parser, and provide useful errors

											
										
										
											2017-10-15 13:58:37 -07:00
+								                formats.append({
 								                    'ext': 'flv',
-												Fix some fallback logic for RTMP link parsing

											
										
										
											2017-10-15 14:26:05 -07:00
+								                    'url': url,
 								                    'format_id': url.split(sep='/')[-1]
-												Remove useless test, add fallback logic to manifest parser, and provide useful errors

											
										
										
											2017-10-15 13:58:37 -07:00
+								                })
 								        self._sort_formats(formats)
 								        return formats