Merge branch 'master' into KeezMovies-issue-16134

2018-04-17 12:54:15 -05:00 · 2018-04-17 12:54:15 -05:00 · 9e61f0e191
commit 9e61f0e191
parent d8e7e9aa0a 518d5ba519
15 changed files with 264 additions and 51 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -6,8 +6,8 @@

 ---

-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.04.09*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.04.09**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.04.16*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.04.16**

 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2018.04.09
+[debug] youtube-dl version 2018.04.16
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/13
+++ b/13
@ -1,3 +1,16 @@
+version 2018.04.16
+
+Extractors
+* [smotri:broadcast] Fix extraction (#16180)
+ [picarto] Add support for picarto.tv (#6205, #12514, #15276, #15551)
+* [vine:user] Fix extraction (#15514, #16190)
+* [pornhub] Relax URL regular expression (#16165)
+* [cbc:watch] Re-acquire device token when expired (#16160)
+ [fxnetworks] Add support for https theplatform URLs (#16125, #16157)
+ [instagram:user] Add request signing (#16119)
+ [twitch] Add support for mobile URLs (#16146)
+
+
 version 2018.04.09

 Core
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -628,6 +628,8 @@
 - **PhilharmonieDeParis**: Philharmonie de Paris
 - **phoenix.de**
 - **Photobucket**
+ - **Picarto**
+ - **PicartoVod**
 - **Piksel**
 - **Pinkbike**
 - **Pladform**
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@ -232,7 +232,7 @@ class TestNPOSubtitles(BaseTestSubtitles):


 class TestMTVSubtitles(BaseTestSubtitles):
-    url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother'
+    url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans'
    IE = ComedyCentralIE

    def getInfoDict(self):
@ -243,7 +243,7 @@ class TestMTVSubtitles(BaseTestSubtitles):
        self.DL.params['allsubtitles'] = True
        subtitles = self.getSubtitles()
        self.assertEqual(set(subtitles.keys()), set(['en']))
-        self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65')
+        self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961')


 class TestNRKSubtitles(BaseTestSubtitles):
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@ -61,7 +61,7 @@ class TestYoutubeLists(unittest.TestCase):
        dl = FakeYDL()
        dl.params['extract_flat'] = True
        ie = YoutubePlaylistIE(dl)
-        result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
+        result = ie.extract('https://www.youtube.com/playlist?list=PL-KKIb8rvtMSrAO9YFbeM6UQrAqoFTUWv')
        self.assertIsPlaylist(result)
        for entry in result['entries']:
            self.assertTrue(entry.get('title'))
--- a/youtube_dl/extractor/americastestkitchen.py
+++ b/youtube_dl/extractor/americastestkitchen.py
--- a/youtube_dl/extractor/cbs.py
+++ b/youtube_dl/extractor/cbs.py
@ -65,7 +65,7 @@ class CBSIE(CBSBaseIE):
        last_e = None
        for item in items_data.findall('.//item'):
            asset_type = xpath_text(item, 'assetType')
-            if not asset_type or asset_type in asset_types:
+            if not asset_type or asset_type in asset_types or asset_type in ('HLS_FPS', 'DASH_CENC'):
                continue
            asset_types.append(asset_type)
            query = {
--- a/youtube_dl/extractor/cda.py
+++ b/youtube_dl/extractor/cda.py
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -815,6 +815,10 @@ from .periscope import (
 from .philharmoniedeparis import PhilharmonieDeParisIE
 from .phoenix import PhoenixIE
 from .photobucket import PhotobucketIE
+from .picarto import (
+    PicartoIE,
+    PicartoVodIE,
+)
 from .piksel import PikselIE
 from .pinkbike import PinkbikeIE
 from .pladform import PladformIE
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dl/extractor/instagram.py
@ -6,11 +6,16 @@ import json
 import re

 from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
+    compat_str,
+    compat_HTTPError,
+)
 from ..utils import (
+    ExtractorError,
    get_element_by_attribute,
    int_or_none,
    lowercase_escape,
+    std_headers,
    try_get,
 )

@ -239,6 +244,8 @@ class InstagramUserIE(InfoExtractor):
        }
    }

+    _gis_tmpl = None
+
    def _entries(self, data):
        def get_count(suffix):
            return int_or_none(try_get(
@ -257,16 +264,36 @@ class InstagramUserIE(InfoExtractor):
                'first': 100,
                'after': cursor,
            })
-            s = '%s:%s:%s' % (rhx_gis, csrf_token, variables)
-            media = self._download_json(
-                'https://www.instagram.com/graphql/query/', uploader_id,
-                'Downloading JSON page %d' % page_num, headers={
-                    'X-Requested-With': 'XMLHttpRequest',
-                    'X-Instagram-GIS': hashlib.md5(s.encode('utf-8')).hexdigest(),
-                }, query={
-                    'query_hash': '472f257a40c653c64c666ce877d59d2b',
-                    'variables': variables,
-                })['data']['user']['edge_owner_to_timeline_media']
+
+            if self._gis_tmpl:
+                gis_tmpls = [self._gis_tmpl]
+            else:
+                gis_tmpls = [
+                    '%s' % rhx_gis,
+                    '',
+                    '%s:%s' % (rhx_gis, csrf_token),
+                    '%s:%s:%s' % (rhx_gis, csrf_token, std_headers['User-Agent']),
+                ]
+
+            for gis_tmpl in gis_tmpls:
+                try:
+                    media = self._download_json(
+                        'https://www.instagram.com/graphql/query/', uploader_id,
+                        'Downloading JSON page %d' % page_num, headers={
+                            'X-Requested-With': 'XMLHttpRequest',
+                            'X-Instagram-GIS': hashlib.md5(
+                                ('%s:%s' % (gis_tmpl, variables)).encode('utf-8')).hexdigest(),
+                        }, query={
+                            'query_hash': '42323d64886122307be10013ad2dcc44',
+                            'variables': variables,
+                        })['data']['user']['edge_owner_to_timeline_media']
+                    self._gis_tmpl = gis_tmpl
+                    break
+                except ExtractorError as e:
+                    if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+                        if gis_tmpl != gis_tmpls[-1]:
+                            continue
+                    raise

            edges = media.get('edges')
            if not edges or not isinstance(edges, list):
--- a/youtube_dl/extractor/joj.py
+++ b/youtube_dl/extractor/joj.py
--- a/youtube_dl/extractor/picarto.py
+++ b/youtube_dl/extractor/picarto.py
@ -0,0 +1,165 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import time
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    js_to_json,
+    try_get,
+    update_url_query,
+    urlencode_postdata,
+)
+
+
+class PicartoIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
+    _TEST = {
+        'url': 'https://picarto.tv/Setz',
+        'info_dict': {
+            'id': 'Setz',
+            'ext': 'mp4',
+            'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'timestamp': int,
+            'is_live': True
+        },
+        'skip': 'Stream is offline',
+    }
+
+    @classmethod
+    def suitable(cls, url):
+        return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        channel_id = self._match_id(url)
+        stream_page = self._download_webpage(url, channel_id)
+
+        if '>This channel does not exist' in stream_page:
+            raise ExtractorError(
+                'Channel %s does not exist' % channel_id, expected=True)
+
+        player = self._parse_json(
+            self._search_regex(
+                r'(?s)playerSettings\[\d+\]\s*=\s*(\{.+?\}\s*\n)', stream_page,
+                'player settings'),
+            channel_id, transform_source=js_to_json)
+
+        if player.get('online') is False:
+            raise ExtractorError('Stream is offline', expected=True)
+
+        cdn_data = self._download_json(
+            'https://picarto.tv/process/channel', channel_id,
+            data=urlencode_postdata({'loadbalancinginfo': channel_id}),
+            note='Downloading load balancing info')
+
+        def get_event(key):
+            return try_get(player, lambda x: x['event'][key], compat_str) or ''
+
+        params = {
+            'token': player.get('token') or '',
+            'ticket': get_event('ticket'),
+            'con': int(time.time() * 1000),
+            'type': get_event('ticket'),
+            'scope': get_event('scope'),
+        }
+
+        prefered_edge = cdn_data.get('preferedEdge')
+        default_tech = player.get('defaultTech')
+
+        formats = []
+
+        for edge in cdn_data['edges']:
+            edge_ep = edge.get('ep')
+            if not edge_ep or not isinstance(edge_ep, compat_str):
+                continue
+            edge_id = edge.get('id')
+            for tech in cdn_data['techs']:
+                tech_label = tech.get('label')
+                tech_type = tech.get('type')
+                preference = 0
+                if edge_id == prefered_edge:
+                    preference += 1
+                if tech_type == default_tech:
+                    preference += 1
+                format_id = []
+                if edge_id:
+                    format_id.append(edge_id)
+                if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
+                    format_id.append('hls')
+                    formats.extend(self._extract_m3u8_formats(
+                        update_url_query(
+                            'https://%s/hls/%s/index.m3u8'
+                            % (edge_ep, channel_id), params),
+                        channel_id, 'mp4', preference=preference,
+                        m3u8_id='-'.join(format_id), fatal=False))
+                    continue
+                elif tech_type == 'video/mp4' or tech_label == 'MP4':
+                    format_id.append('mp4')
+                    formats.append({
+                        'url': update_url_query(
+                            'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
+                            params),
+                        'format_id': '-'.join(format_id),
+                        'preference': preference,
+                    })
+                else:
+                    # rtmp format does not seem to work
+                    continue
+        self._sort_formats(formats)
+
+        mature = player.get('mature')
+        if mature is None:
+            age_limit = None
+        else:
+            age_limit = 18 if mature is True else 0
+
+        return {
+            'id': channel_id,
+            'title': self._live_title(channel_id),
+            'is_live': True,
+            'thumbnail': player.get('vodThumb'),
+            'age_limit': age_limit,
+            'formats': formats,
+        }
+
+
+class PicartoVodIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
+        'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
+        'info_dict': {
+            'id': 'ArtofZod_2017.12.12.00.13.23.flv',
+            'ext': 'mp4',
+            'title': 'ArtofZod_2017.12.12.00.13.23.flv',
+            'thumbnail': r're:^https?://.*\.jpg'
+        },
+    }, {
+        'url': 'https://picarto.tv/videopopout/Plague',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        vod_info = self._parse_json(
+            self._search_regex(
+                r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage,
+                video_id),
+            video_id, transform_source=js_to_json)
+
+        formats = self._extract_m3u8_formats(
+            vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
+            m3u8_id='hls')
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': video_id,
+            'thumbnail': vod_info.get('vodThumb'),
+            'formats': formats,
+        }
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dl/extractor/smotri.py
@ -310,6 +310,7 @@ class SmotriBroadcastIE(InfoExtractor):
    IE_DESC = 'Smotri.com broadcasts'
    IE_NAME = 'smotri:broadcast'
    _VALID_URL = r'https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<id>[^/]+))/?.*'
+    _NETRC_MACHINE = 'smotri'

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@ -352,17 +353,18 @@ class SmotriBroadcastIE(InfoExtractor):
            adult_content = False

        ticket = self._html_search_regex(
-            r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'([^']+)'\)",
-            broadcast_page, 'broadcast ticket')
+            (r'data-user-file=(["\'])(?P<ticket>(?!\1).+)\1',
+             r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'(?P<ticket>[^']+)'\)"),
+            broadcast_page, 'broadcast ticket', group='ticket')

-        url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket
+        broadcast_url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket

        broadcast_password = self._downloader.params.get('videopassword')
        if broadcast_password:
-            url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()
+            broadcast_url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest()

        broadcast_json_page = self._download_webpage(
-            url, broadcast_id, 'Downloading broadcast JSON')
+            broadcast_url, broadcast_id, 'Downloading broadcast JSON')

        try:
            broadcast_json = json.loads(broadcast_json_page)
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dl/extractor/vine.py
@ -2,9 +2,9 @@
 from __future__ import unicode_literals

 import re
-import itertools

 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
    determine_ext,
    int_or_none,
@ -112,21 +112,24 @@ class VineIE(InfoExtractor):

 class VineUserIE(InfoExtractor):
    IE_NAME = 'vine:user'
-    _VALID_URL = r'(?:https?://)?vine\.co/(?P<u>u/)?(?P<user>[^/]+)/?(\?.*)?$'
+    _VALID_URL = r'https?://vine\.co/(?P<u>u/)?(?P<user>[^/]+)'
    _VINE_BASE_URL = 'https://vine.co/'
-    _TESTS = [
-        {
-            'url': 'https://vine.co/Visa',
-            'info_dict': {
-                'id': 'Visa',
-            },
-            'playlist_mincount': 46,
+    _TESTS = [{
+        'url': 'https://vine.co/itsruthb',
+        'info_dict': {
+            'id': 'itsruthb',
+            'title': 'Ruth B',
+            'description': '| Instagram/Twitter: itsruthb | still a lost boy from neverland',
        },
-        {
-            'url': 'https://vine.co/u/941705360593584128',
-            'only_matching': True,
-        },
-    ]
+        'playlist_mincount': 611,
+    }, {
+        'url': 'https://vine.co/u/942914934646415360',
+        'only_matching': True,
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if VineIE.suitable(url) else super(VineUserIE, cls).suitable(url)

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
@ -138,17 +141,14 @@ class VineUserIE(InfoExtractor):
        profile_data = self._download_json(
            profile_url, user, note='Downloading user profile data')

-        user_id = profile_data['data']['userId']
-        timeline_data = []
-        for pagenum in itertools.count(1):
-            timeline_url = '%sapi/timelines/users/%s?page=%s&size=100' % (
-                self._VINE_BASE_URL, user_id, pagenum)
-            timeline_page = self._download_json(
-                timeline_url, user, note='Downloading page %d' % pagenum)
-            timeline_data.extend(timeline_page['data']['records'])
-            if timeline_page['data']['nextPage'] is None:
-                break
-
+        data = profile_data['data']
+        user_id = data.get('userId') or data['userIdStr']
+        profile = self._download_json(
+            'https://archive.vine.co/profiles/%s.json' % user_id, user_id)
        entries = [
-            self.url_result(e['permalinkUrl'], 'Vine') for e in timeline_data]
-        return self.playlist_result(entries, user)
+            self.url_result(
+                'https://vine.co/v/%s' % post_id, ie='Vine', video_id=post_id)
+            for post_id in profile['posts']
+            if post_id and isinstance(post_id, compat_str)]
+        return self.playlist_result(
+            entries, user, profile.get('username'), profile.get('description'))
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2018.04.09'
+__version__ = '2018.04.16'