Merge branch 'master' of https://github.com/rg3/youtube-dl

2016-08-10 13:54:58 +02:00 · 2016-08-10 13:54:58 +02:00 · 6919b10e8d
commit 6919b10e8d
parent 58214ece59 69d8eeeec5
11 changed files with 203 additions and 44 deletions
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -6,8 +6,8 @@

 ---

-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.07*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.07**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.10*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.10**

 ### Before submitting an *issue* make sure you have:
 - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
 [debug] User config: []
 [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2016.08.07
+[debug] youtube-dl version 2016.08.10
 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
 [debug] Proxy map: {}
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -46,7 +46,7 @@ Make sure that someone has not already opened the issue you're trying to open. S

 ###  Why are existing options not enough?

-Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#synopsis). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
+Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/rg3/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.

 ###  Is there enough context in your bug report?

--- a/23
+++ b/23
@ -1,8 +1,29 @@
 version <unreleased>

+Core
+ Recognize more formats in unified_timestamp
+
 Extractors
-* [kuwo:singer] Fix extraction
+* [ctsnews] Fix extraction
+
+
+version 2016.08.10
+
+Core
+* Make --metadata-from-title non fatal when title does not match the pattern
+* Introduce options for randomized sleep before each download
+  --min-sleep-interval and --max-sleep-interval (#9930)
+* Respect default in _search_json_ld
+
+Extractors
+ [uol] Add extractor for uol.com.br (#4263)
+* [rbmaradio] Fix extraction and extract all formats (#10242)
+ [sonyliv] Add extractor for sonyliv.com (#10258)
 * [aparat] Fix extraction
+* [cwtv] Extract HTTP formats
+ [rozhlas] Add extractor for prehravac.rozhlas.cz (#10253)
+* [kuwo:singer] Fix extraction
+

 version 2016.08.07

--- a/README.md
+++ b/README.md
@ -330,7 +330,15 @@ which means you can modify it, redistribute it or use it however you like.
                                     bidirectional text support. Requires bidiv
                                     or fribidi executable in PATH
    --sleep-interval SECONDS         Number of seconds to sleep before each
-                                     download.
+                                     download when used alone or a lower bound
+                                     of a range for randomized sleep before each
+                                     download (minimum possible number of
+                                     seconds to sleep) when used along with
+                                     --max-sleep-interval.
+    --max-sleep-interval SECONDS     Upper bound of a range for randomized sleep
+                                     before each download (maximum possible
+                                     number of seconds to sleep). Must only be
+                                     used along with --min-sleep-interval.

 ## Video Format Options:
    -f, --format FORMAT              Video format code, see the "FORMAT
--- a/devscripts/prepare_manpage.py
+++ b/devscripts/prepare_manpage.py
@ -54,17 +54,21 @@ def filter_options(readme):

        if in_options:
            if line.lstrip().startswith('-'):
-                option, description = re.split(r'\s{2,}', line.lstrip())
-                split_option = option.split(' ')
+                split = re.split(r'\s{2,}', line.lstrip())
+                # Description string may start with `-` as well. If there is
+                # only one piece then it's a description bit not an option.
+                if len(split) > 1:
+                    option, description = split
+                    split_option = option.split(' ')

-                if not split_option[-1].startswith('-'):  # metavar
-                    option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]])
+                    if not split_option[-1].startswith('-'):  # metavar
+                        option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]])

-                # Pandoc's definition_lists. See http://pandoc.org/README.html
-                # for more information.
-                ret += '\n%s\n:   %s\n' % (option, description)
-            else:
-                ret += line.lstrip() + '\n'
+                    # Pandoc's definition_lists. See http://pandoc.org/README.html
+                    # for more information.
+                    ret += '\n%s\n:   %s\n' % (option, description)
+                    continue
+            ret += line.lstrip() + '\n'
        else:
            ret += line + '\n'

--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -564,6 +564,7 @@
 - **RoosterTeeth**
 - **RottenTomatoes**
 - **Roxwel**
+ - **Rozhlas**
 - **RTBF**
 - **rte**: Raidió Teilifís Éireann TV
 - **rte:radio**: Raidió Teilifís Éireann radio
@ -621,6 +622,7 @@
 - **smotri:user**: Smotri.com user videos
 - **Snotr**
 - **Sohu**
+ - **SonyLIV**
 - **soundcloud**
 - **soundcloud:playlist**
 - **soundcloud:search**: Soundcloud search
@ -747,6 +749,7 @@
 - **udemy:course**
 - **UDNEmbed**: 聯合影音
 - **Unistra**
+ - **uol.com.br**
 - **Urort**: NRK P3 Urørt
 - **URPlay**
 - **USAToday**
--- a/youtube_dl/extractor/ctsnews.py
+++ b/youtube_dl/extractor/ctsnews.py
@ -1,13 +1,12 @@
-# -*- coding: utf-8 -*-
+# coding: utf-8
 from __future__ import unicode_literals

 from .common import InfoExtractor
-from ..utils import parse_iso8601, ExtractorError
+from ..utils import unified_timestamp


 class CtsNewsIE(InfoExtractor):
    IE_DESC = '華視新聞'
-    # https connection failed (Connection reset)
    _VALID_URL = r'https?://news\.cts\.com\.tw/[a-z]+/[a-z]+/\d+/(?P<id>\d+)\.html'
    _TESTS = [{
        'url': 'http://news.cts.com.tw/cts/international/201501/201501291578109.html',
@ -16,7 +15,7 @@ class CtsNewsIE(InfoExtractor):
            'id': '201501291578109',
            'ext': 'mp4',
            'title': '以色列.真主黨交火 3人死亡',
-            'description': 'md5:95e9b295c898b7ff294f09d450178d7d',
+            'description': '以色列和黎巴嫩真主黨，爆發五年最嚴重衝突，雙方砲轟交火，兩名以軍死亡，還有一名西班牙籍的聯合國維和人...',
            'timestamp': 1422528540,
            'upload_date': '20150129',
        }
@ -28,7 +27,7 @@ class CtsNewsIE(InfoExtractor):
            'id': '201309031304098',
            'ext': 'mp4',
            'title': '韓國31歲童顏男 貌如十多歲小孩',
-            'description': 'md5:f183feeba3752b683827aab71adad584',
+            'description': '越有年紀的人，越希望看起來年輕一點，而南韓卻有一位31歲的男子，看起來像是11、12歲的小孩，身...',
            'thumbnail': 're:^https?://.*\.jpg$',
            'timestamp': 1378205880,
            'upload_date': '20130903',
@ -36,8 +35,7 @@ class CtsNewsIE(InfoExtractor):
    }, {
        # With Youtube embedded video
        'url': 'http://news.cts.com.tw/cts/money/201501/201501291578003.html',
-        'md5': '1d842c771dc94c8c3bca5af2cc1db9c5',
-        'add_ie': ['Youtube'],
+        'md5': 'e4726b2ccd70ba2c319865e28f0a91d1',
        'info_dict': {
            'id': 'OVbfO7d0_hQ',
            'ext': 'mp4',
@ -47,42 +45,37 @@ class CtsNewsIE(InfoExtractor):
            'upload_date': '20150128',
            'uploader_id': 'TBSCTS',
            'uploader': '中華電視公司',
-        }
+        },
+        'add_ie': ['Youtube'],
    }]

    def _real_extract(self, url):
        news_id = self._match_id(url)
        page = self._download_webpage(url, news_id)

-        if self._search_regex(r'(CTSPlayer2)', page, 'CTSPlayer2 identifier', default=None):
-            feed_url = self._html_search_regex(
-                r'(http://news\.cts\.com\.tw/action/mp4feed\.php\?news_id=\d+)',
-                page, 'feed url')
-            video_url = self._download_webpage(
-                feed_url, news_id, note='Fetching feed')
+        news_id = self._hidden_inputs(page).get('get_id')
+
+        if news_id:
+            mp4_feed = self._download_json(
+                'http://news.cts.com.tw/action/test_mp4feed.php',
+                news_id, note='Fetching feed', query={'news_id': news_id})
+            video_url = mp4_feed['source_url']
        else:
            self.to_screen('Not CTSPlayer video, trying Youtube...')
            youtube_url = self._search_regex(
-                r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url',
-                default=None)
-            if not youtube_url:
-                raise ExtractorError('The news includes no videos!', expected=True)
+                r'src="(//www\.youtube\.com/embed/[^"]+)"', page, 'youtube url')

-            return {
-                '_type': 'url',
-                'url': youtube_url,
-                'ie_key': 'Youtube',
-            }
+            return self.url_result(youtube_url, ie='Youtube')

        description = self._html_search_meta('description', page)
-        title = self._html_search_meta('title', page)
+        title = self._html_search_meta('title', page, fatal=True)
        thumbnail = self._html_search_meta('image', page)

        datetime_str = self._html_search_regex(
-            r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time')
-        # Transform into ISO 8601 format with timezone info
-        datetime_str = datetime_str.replace('/', '-') + ':00+0800'
-        timestamp = parse_iso8601(datetime_str, delimiter=' ')
+            r'(\d{4}/\d{2}/\d{2} \d{2}:\d{2})', page, 'date and time', fatal=False)
+        timestamp = None
+        if datetime_str:
+            timestamp = unified_timestamp(datetime_str) - 8 * 3600

        return {
            'id': news_id,
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -929,6 +929,7 @@ from .udemy import (
 from .udn import UDNEmbedIE
 from .digiteka import DigitekaIE
 from .unistra import UnistraIE
+from .uol import UOLIE
 from .urort import UrortIE
 from .urplay import URPlayIE
 from .usatoday import USATodayIE
--- a/youtube_dl/extractor/uol.py
+++ b/youtube_dl/extractor/uol.py
@ -0,0 +1,128 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    int_or_none,
+    parse_duration,
+    update_url_query,
+    str_or_none,
+)
+
+
+class UOLIE(InfoExtractor):
+    IE_NAME = 'uol.com.br'
+    _VALID_URL = r'https?://(?:.+?\.)?uol\.com\.br/.*?(?:(?:mediaId|v)=|view/(?:[a-z0-9]+/)?|video(?:=|/(?:\d{4}/\d{2}/\d{2}/)?))(?P<id>\d+|[\w-]+-[A-Z0-9]+)'
+    _TESTS = [{
+        'url': 'http://player.mais.uol.com.br/player_video_v3.swf?mediaId=15951931',
+        'md5': '25291da27dc45e0afb5718a8603d3816',
+        'info_dict': {
+            'id': '15951931',
+            'ext': 'mp4',
+            'title': 'Miss simpatia é encontrada morta',
+            'description': 'md5:3f8c11a0c0556d66daf7e5b45ef823b2',
+        }
+    }, {
+        'url': 'http://tvuol.uol.com.br/video/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326',
+        'md5': 'e41a2fb7b7398a3a46b6af37b15c00c9',
+        'info_dict': {
+            'id': '15954259',
+            'ext': 'mp4',
+            'title': 'Incêndio destrói uma das maiores casas noturnas de Londres',
+            'description': 'Em Londres, um incêndio destruiu uma das maiores boates da cidade. Não há informações sobre vítimas.',
+        }
+    }, {
+        'url': 'http://mais.uol.com.br/static/uolplayer/index.html?mediaId=15951931',
+        'only_matching': True,
+    }, {
+        'url': 'http://mais.uol.com.br/view/15954259',
+        'only_matching': True,
+    }, {
+        'url': 'http://noticias.band.uol.com.br/brasilurgente/video/2016/08/05/15951931/miss-simpatia-e-encontrada-morta.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://videos.band.uol.com.br/programa.asp?e=noticias&pr=brasil-urgente&v=15951931&t=Policia-desmonte-base-do-PCC-na-Cracolandia',
+        'only_matching': True,
+    }, {
+        'url': 'http://mais.uol.com.br/view/cphaa0gl2x8r/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326',
+        'only_matching': True,
+    }, {
+        'url': 'http://noticias.uol.com.br//videos/assistir.htm?video=rafaela-silva-inspira-criancas-no-judo-04024D983968D4C95326',
+        'only_matching': True,
+    }, {
+        'url': 'http://mais.uol.com.br/view/e0qbgxid79uv/15275470',
+        'only_matching': True,
+    }]
+
+    _FORMATS = {
+        '2': {
+            'width': 640,
+            'height': 360,
+        },
+        '5': {
+            'width': 1080,
+            'height': 720,
+        },
+        '6': {
+            'width': 426,
+            'height': 240,
+        },
+        '7': {
+            'width': 1920,
+            'height': 1080,
+        },
+        '8': {
+            'width': 192,
+            'height': 144,
+        },
+        '9': {
+            'width': 568,
+            'height': 320,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        if not video_id.isdigit():
+            embed_page = self._download_webpage('https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id, video_id)
+            video_id = self._search_regex(r'mediaId=(\d+)', embed_page, 'media id')
+        video_data = self._download_json(
+            'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % video_id,
+            video_id)['item']
+        title = video_data['title']
+
+        query = {
+            'ver': video_data.get('numRevision', 2),
+            'r': 'http://mais.uol.com.br',
+        }
+        formats = []
+        for f in video_data.get('formats', []):
+            f_url = f.get('url') or f.get('secureUrl')
+            if not f_url:
+                continue
+            format_id = str_or_none(f.get('id'))
+            fmt = {
+                'format_id': format_id,
+                'url': update_url_query(f_url, query),
+            }
+            fmt.update(self._FORMATS.get(format_id, {}))
+            formats.append(fmt)
+        self._sort_formats(formats)
+
+        tags = []
+        for tag in video_data.get('tags', []):
+            tag_description = tag.get('description')
+            if not tag_description:
+                continue
+            tags.append(tag_description)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': clean_html(video_data.get('desMedia')),
+            'thumbnail': video_data.get('thumbnail'),
+            'duration': int_or_none(video_data.get('durationSeconds')) or parse_duration(video_data.get('duration')),
+            'tags': tags,
+            'formats': formats,
+        }
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -122,6 +122,7 @@ DATE_FORMATS = (
    '%Y %m %d',
    '%Y-%m-%d',
    '%Y/%m/%d',
+    '%Y/%m/%d %H:%M',
    '%Y/%m/%d %H:%M:%S',
    '%Y-%m-%d %H:%M:%S',
    '%Y-%m-%d %H:%M:%S.%f',
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@ -1,3 +1,3 @@
 from __future__ import unicode_literals

-__version__ = '2016.08.07'
+__version__ = '2016.08.10'