]*>(\d+\s+[a-zA-Z]+)<',
+ webpage)]
+ thumbnail = None
+ duration = None
+
+ self._sort_formats(formats)
+
+ view_count = str_to_int(self._search_regex(
+ r'class=["\']views["\'][^>]*>([\d,.]+)', webpage, 'view count'))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ 'age_limit': 18,
+ }
From 8b2dc4c3287e5e90f339af687f3a272818c94fea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?=
Date: Thu, 18 Aug 2016 23:59:13 +0700
Subject: [PATCH 098/218] [options] Remove output template description from
--help
Same reasons as for --format
---
youtube_dl/options.py | 17 +----------------
1 file changed, 1 insertion(+), 16 deletions(-)
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index d32a9e32c..5d62deef4 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -628,22 +628,7 @@ def parseOpts(overrideArguments=None):
filesystem.add_option(
'-o', '--output',
dest='outtmpl', metavar='TEMPLATE',
- help=('Output filename template. Use %(title)s to get the title, '
- '%(uploader)s for the uploader name, %(uploader_id)s for the uploader nickname if different, '
- '%(autonumber)s to get an automatically incremented number, '
- '%(ext)s for the filename extension, '
- '%(format)s for the format description (like "22 - 1280x720" or "HD"), '
- '%(format_id)s for the unique id of the format (like YouTube\'s itags: "137"), '
- '%(upload_date)s for the upload date (YYYYMMDD), '
- '%(extractor)s for the provider (youtube, metacafe, etc), '
- '%(id)s for the video id, '
- '%(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, '
- '%(playlist_index)s for the position in the playlist. '
- '%(height)s and %(width)s for the width and height of the video format. '
- '%(resolution)s for a textual description of the resolution of the video format. '
- '%% for a literal percent. '
- 'Use - to output to stdout. Can also be used to download to a different directory, '
- 'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .'))
+ help=('Output filename template, see the "OUTPUT TEMPLATE" for all the info'))
filesystem.add_option(
'--autonumber-size',
dest='autonumber_size', metavar='NUMBER',
From 93a63b36f1c52a9981050e393d1876d6162abb49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?=
Date: Fri, 19 Aug 2016 00:13:24 +0700
Subject: [PATCH 099/218] [ChangeLog] Actualize
---
ChangeLog | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/ChangeLog b/ChangeLog
index 354306a97..7e8bb834d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+version
+
+Core
+- Remove output template description from --help
+* Recognize lowercase units in parse_filesize
+
+Extractors
++ [porncom] Add extractor for porn.com (#2251, #10251)
++ [generic] Add support for DBTV embeds
+* [vk:wallpost] Fix audio extraction for new site layout
+* [vk] Fix authentication
++ [hgtvcom:show] Add extractor for hgtv.com shows (#10365)
++ [discoverygo] Add support for another GO network sites
+
+
version 2016.08.17
Core
From bd1bcd3ea079889cfd7cd44c0ea750ac9d432e41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?=
Date: Fri, 19 Aug 2016 00:15:12 +0700
Subject: [PATCH 100/218] release 2016.08.19
---
.github/ISSUE_TEMPLATE.md | 6 +++---
ChangeLog | 2 +-
README.md | 28 ++--------------------------
docs/supportedsites.md | 2 ++
youtube_dl/version.py | 2 +-
5 files changed, 9 insertions(+), 31 deletions(-)
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index ae28d83d5..7af3c7099 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -6,8 +6,8 @@
---
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.17*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.17**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.08.19*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.08.19**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ $ youtube-dl -v
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2016.08.17
+[debug] youtube-dl version 2016.08.19
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
diff --git a/ChangeLog b/ChangeLog
index 7e8bb834d..e99ffcec6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,4 @@
-version
+version 2016.08.19
Core
- Remove output template description from --help
diff --git a/README.md b/README.md
index cabbbef76..952db7abb 100644
--- a/README.md
+++ b/README.md
@@ -201,32 +201,8 @@ which means you can modify it, redistribute it or use it however you like.
-a, --batch-file FILE File containing URLs to download ('-' for
stdin)
--id Use only video ID in file name
- -o, --output TEMPLATE Output filename template. Use %(title)s to
- get the title, %(uploader)s for the
- uploader name, %(uploader_id)s for the
- uploader nickname if different,
- %(autonumber)s to get an automatically
- incremented number, %(ext)s for the
- filename extension, %(format)s for the
- format description (like "22 - 1280x720" or
- "HD"), %(format_id)s for the unique id of
- the format (like YouTube's itags: "137"),
- %(upload_date)s for the upload date
- (YYYYMMDD), %(extractor)s for the provider
- (youtube, metacafe, etc), %(id)s for the
- video id, %(playlist_title)s,
- %(playlist_id)s, or %(playlist)s (=title if
- present, ID otherwise) for the playlist the
- video is in, %(playlist_index)s for the
- position in the playlist. %(height)s and
- %(width)s for the width and height of the
- video format. %(resolution)s for a textual
- description of the resolution of the video
- format. %% for a literal percent. Use - to
- output to stdout. Can also be used to
- download to a different directory, for
- example with -o '/my/downloads/%(uploader)s
- /%(title)s-%(id)s.%(ext)s' .
+ -o, --output TEMPLATE Output filename template, see the "OUTPUT
+ TEMPLATE" for all the info
--autonumber-size NUMBER Specify the number of digits in
%(autonumber)s when it is present in output
filename template or --auto-number option
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 189b9301d..edf192138 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -279,6 +279,7 @@
- **Helsinki**: helsinki.fi
- **HentaiStigma**
- **HGTV**
+ - **hgtv.com:show**
- **HistoricFilms**
- **history:topic**: History.com Topic
- **hitbox**
@@ -523,6 +524,7 @@
- **podomatic**
- **Pokemon**
- **PolskieRadio**
+ - **PornCom**
- **PornHd**
- **PornHub**: PornHub and Thumbzilla
- **PornHubPlaylist**
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index cf5950117..691f2c591 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2016.08.17'
+__version__ = '2016.08.19'
From 9e5751b9fe72f7425e4cb3f22a56b6a95b59e41d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sergey=20M=E2=80=A4?=
Date: Fri, 19 Aug 2016 01:13:45 +0700
Subject: [PATCH 101/218] [globo:article] Relax _VALID_URL and video id regex
(Closes #10379)
---
youtube_dl/extractor/globo.py | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py
index 3de8356f6..dbacbfc61 100644
--- a/youtube_dl/extractor/globo.py
+++ b/youtube_dl/extractor/globo.py
@@ -396,12 +396,12 @@ class GloboIE(InfoExtractor):
class GloboArticleIE(InfoExtractor):
- _VALID_URL = 'https?://.+?\.globo\.com/(?:[^/]+/)*(?P[^/]+)\.html'
+ _VALID_URL = 'https?://.+?\.globo\.com/(?:[^/]+/)*(?P[^/]+)(?:\.html)?'
_VIDEOID_REGEXES = [
r'\bdata-video-id=["\'](\d{7,})',
r'\bdata-player-videosids=["\'](\d{7,})',
- r'\bvideosIDs\s*:\s*["\'](\d{7,})',
+ r'\bvideosIDs\s*:\s*["\']?(\d{7,})',
r'\bdata-id=["\'](\d{7,})',
r']+\bid=["\'](\d{7,})',
]
@@ -423,6 +423,9 @@ class GloboArticleIE(InfoExtractor):
}, {
'url': 'http://gshow.globo.com/programas/tv-xuxa/O-Programa/noticia/2014/01/xuxa-e-junno-namoram-muuuito-em-luau-de-zeze-di-camargo-e-luciano.html',
'only_matching': True,
+ }, {
+ 'url': 'http://oglobo.globo.com/rio/a-amizade-entre-um-entregador-de-farmacia-um-piano-19946271',
+ 'only_matching': True,
}]
@classmethod
From e4659b45474acb563db0ab4284abdfc80837307e Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan
Date: Fri, 19 Aug 2016 20:37:17 +0800
Subject: [PATCH 102/218] [utils] Correct octal/hexadecimal number detection in
js_to_json
---
ChangeLog | 6 ++++++
test/test_utils.py | 3 +++
youtube_dl/utils.py | 6 +++---
3 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index e99ffcec6..98a3dbca3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+version
+
+Core
+* Fix js_to_json(): correct octal or hexadecimal number detection
+
+
version 2016.08.19
Core
diff --git a/test/test_utils.py b/test/test_utils.py
index cb578cd53..b83da93b4 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -712,6 +712,9 @@ class TestUtil(unittest.TestCase):
inp = '''{"foo":101}'''
self.assertEqual(js_to_json(inp), '''{"foo":101}''')
+ inp = '''{"duration": "00:01:07"}'''
+ self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''')
+
def test_js_to_json_edgecases(self):
on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 35362e767..0c36c1b80 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2038,14 +2038,14 @@ def js_to_json(code):
}.get(m.group(0), m.group(0)), v[1:-1])
INTEGER_TABLE = (
- (r'^0[xX][0-9a-fA-F]+', 16),
- (r'^0+[0-7]+', 8),
+ (r'^(0[xX][0-9a-fA-F]+)\s*:?$', 16),
+ (r'^(0+[0-7]+)\s*:?$', 8),
)
for regex, base in INTEGER_TABLE:
im = re.match(regex, v)
if im:
- i = int(im.group(0), base)
+ i = int(im.group(1), base)
return '"%d":' % i if v.endswith(':') else '%d' % i
return '"%s"' % v
From b82232036a019e340b715779108c3f4caea8a78d Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan
Date: Fri, 19 Aug 2016 20:39:28 +0800
Subject: [PATCH 103/218] [n-tv.de] Fix extraction (closes #10331)
---
ChangeLog | 3 +++
youtube_dl/extractor/ntvde.py | 8 +++++---
2 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 98a3dbca3..6281fe325 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -3,6 +3,9 @@ version
Core
* Fix js_to_json(): correct octal or hexadecimal number detection
+Extractors
+* [n-tv.de] Fix extraction (#10331)
+
version 2016.08.19
diff --git a/youtube_dl/extractor/ntvde.py b/youtube_dl/extractor/ntvde.py
index a83e85cb8..d28a81542 100644
--- a/youtube_dl/extractor/ntvde.py
+++ b/youtube_dl/extractor/ntvde.py
@@ -1,6 +1,8 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
@@ -40,8 +42,8 @@ class NTVDeIE(InfoExtractor):
timestamp = int_or_none(info.get('publishedDateAsUnixTimeStamp'))
vdata = self._parse_json(self._search_regex(
r'(?s)\$\(\s*"\#player"\s*\)\s*\.data\(\s*"player",\s*(\{.*?\})\);',
- webpage, 'player data'),
- video_id, transform_source=js_to_json)
+ webpage, 'player data'), video_id,
+ transform_source=lambda s: js_to_json(re.sub(r'advertising:\s*{[^}]+},', '', s)))
duration = parse_duration(vdata.get('duration'))
formats = []
From 55af45fcab4295a92d56180cdbebe7b47e094bc3 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan
Date: Fri, 19 Aug 2016 23:12:30 +0800
Subject: [PATCH 104/218] [radiobremen] Update _TEST (closes #10337)
---
youtube_dl/extractor/radiobremen.py | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py
index 0cbb15f08..19a751da0 100644
--- a/youtube_dl/extractor/radiobremen.py
+++ b/youtube_dl/extractor/radiobremen.py
@@ -13,15 +13,15 @@ class RadioBremenIE(InfoExtractor):
IE_NAME = 'radiobremen'
_TEST = {
- 'url': 'http://www.radiobremen.de/mediathek/index.html?id=114720',
+ 'url': 'http://www.radiobremen.de/mediathek/?id=141876',
'info_dict': {
- 'id': '114720',
+ 'id': '141876',
'ext': 'mp4',
- 'duration': 1685,
+ 'duration': 178,
'width': 512,
- 'title': 'buten un binnen vom 22. Dezember',
+ 'title': 'Druck auf Patrick Öztürk',
'thumbnail': 're:https?://.*\.jpg$',
- 'description': 'Unter anderem mit diesen Themen: 45 Flüchtlinge sind in Worpswede angekommen +++ Freies Internet für alle: Bremer arbeiten an einem flächendeckenden W-Lan-Netzwerk +++ Aktivisten kämpfen für das Unibad +++ So war das Wetter 2014 +++',
+ 'description': 'Gegen den SPD-Bürgerschaftsabgeordneten Patrick Öztürk wird wegen Beihilfe zum gewerbsmäßigen Betrug ermittelt. Am Donnerstagabend sollte er dem Vorstand des SPD-Unterbezirks Bremerhaven dazu Rede und Antwort stehen.',
},
}
From 520251c093f5e0fe6af5e57203a0452aef0682ac Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan
Date: Fri, 19 Aug 2016 23:53:47 +0800
Subject: [PATCH 105/218] [extractor/common] Recognize m3u8 manifests in HTML5
multimedia tags
---
ChangeLog | 1 +
youtube_dl/extractor/common.py | 36 +++++++++++++++++++++++-----------
2 files changed, 26 insertions(+), 11 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 6281fe325..450351231 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,7 @@
version
Core
+* Support m3u8 manifests in HTML5 multimedia tags
* Fix js_to_json(): correct octal or hexadecimal number detection
Extractors
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 9427ff449..07d58afe7 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1695,7 +1695,7 @@ class InfoExtractor(object):
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
return formats
- def _parse_html5_media_entries(self, base_url, webpage):
+ def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None):
def absolute_url(video_url):
return compat_urlparse.urljoin(base_url, video_url)
@@ -1710,6 +1710,21 @@ class InfoExtractor(object):
return f
return {}
+ def _media_formats(src, cur_media_type):
+ full_url = absolute_url(src)
+ if determine_ext(full_url) == 'm3u8':
+ is_plain_url = False
+ formats = self._extract_m3u8_formats(
+ full_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id=m3u8_id)
+ else:
+ is_plain_url = True
+ formats = [{
+ 'url': full_url,
+ 'vcodec': 'none' if cur_media_type == 'audio' else None,
+ }]
+ return is_plain_url, formats
+
entries = []
for media_tag, media_type, media_content in re.findall(r'(?s)(<(?Pvideo|audio)[^>]*>)(.*?)(?P=tag)>', webpage):
media_info = {
@@ -1719,10 +1734,8 @@ class InfoExtractor(object):
media_attributes = extract_attributes(media_tag)
src = media_attributes.get('src')
if src:
- media_info['formats'].append({
- 'url': absolute_url(src),
- 'vcodec': 'none' if media_type == 'audio' else None,
- })
+ _, formats = _media_formats(src)
+ media_info['formats'].extend(formats)
media_info['thumbnail'] = media_attributes.get('poster')
if media_content:
for source_tag in re.findall(r']+>', media_content):
@@ -1730,12 +1743,13 @@ class InfoExtractor(object):
src = source_attributes.get('src')
if not src:
continue
- f = parse_content_type(source_attributes.get('type'))
- f.update({
- 'url': absolute_url(src),
- 'vcodec': 'none' if media_type == 'audio' else None,
- })
- media_info['formats'].append(f)
+ is_plain_url, formats = _media_formats(src, media_type)
+ if is_plain_url:
+ f = parse_content_type(source_attributes.get('type'))
+ f.update(formats[0])
+ media_info['formats'].append(f)
+ else:
+ media_info['formats'].extend(formats)
for track_tag in re.findall(r'
Reviewed on ([0-9/.]+)
', webpage, 'upload date', - fatal=False)) + fatal=False), day_first=False) return { 'id': video_id, From 52aa7e7476415ec632053f85f9db0919f7bf75c3 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan