diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index 81fe10d54..881475878 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -6,8 +6,8 @@
---
-### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.20*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
-- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.20**
+### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.10.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
+- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.10.29**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@@ -35,7 +35,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2017.10.20
+[debug] youtube-dl version 2017.10.29
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
diff --git a/ChangeLog b/ChangeLog
index 547b55981..d33a710fb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,28 @@
+version 2017.10.29
+
+Core
+* [extractor/common] Prefix format id for audio only HLS formats
++ [utils] Add support for zero years and months in parse_duration
+
+Extractors
+* [egghead] Fix extraction (#14388)
++ [fxnetworks] Extract series metadata (#14603)
++ [younow] Add support for younow.com (#9255, #9432, #12436)
+* [dctptv] Fix extraction (#14599)
+* [youtube] Restrict embed regex (#14600)
+* [vimeo] Restrict iframe embed regex (#14600)
+* [soundgasm] Improve extraction (#14588)
+- [myvideo] Remove extractor (#8557)
++ [nbc] Add support for classic-tv videos (#14575)
++ [vrtnu] Add support for cookies authentication and simplify (#11873)
++ [canvas] Add support for vrt.be/vrtnu (#11873)
+* [twitch:clips] Fix title extraction (#14566)
++ [ndtv] Add support for sub-sites (#14534)
+* [dramafever] Fix login error message extraction
++ [nick] Add support for more nickelodeon sites (no, dk, se, ch, fr, es, pt,
+ ro, hu) (#14553)
+
+
version 2017.10.20
Core
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index be5de22df..7b8e7403a 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -498,7 +498,6 @@
- **MySpace:album**
- **MySpass**
- **Myvi**
- - **myvideo** (Currently broken)
- **MyVidster**
- **n-tv.de**
- **natgeo**
@@ -977,6 +976,7 @@
- **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **Vrak**
- **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be
+ - **VrtNU**: VrtNU.be
- **vrv**
- **vrv:series**
- **VShare**
@@ -1035,6 +1035,9 @@
- **YouJizz**
- **youku**: 优酷
- **youku:show**
+ - **YouNowChannel**
+ - **YouNowLive**
+ - **YouNowMoment**
- **YouPorn**
- **YourUpload**
- **youtube**: YouTube.com
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index f18a823fc..686c63efa 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -574,6 +574,32 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
+ def test_parse_f4m_formats(self):
+ _TEST_CASES = [
+ (
+ # https://github.com/rg3/youtube-dl/issues/14660
+ 'custom_base_url',
+ 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
+ [{
+ 'manifest_url': 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m',
+ 'ext': 'flv',
+ 'format_id': '2148',
+ 'protocol': 'f4m',
+ 'tbr': 2148,
+ 'width': 1280,
+ 'height': 720,
+ }]
+ ),
+ ]
+
+ for f4m_file, f4m_url, expected_formats in _TEST_CASES:
+ with io.open('./test/testdata/f4m/%s.f4m' % f4m_file,
+ mode='r', encoding='utf-8') as f:
+ formats = self.ie._parse_f4m_formats(
+ compat_etree_fromstring(f.read().encode('utf-8')),
+ f4m_url, None)
+ self.ie._sort_formats(formats)
+ expect_value(self, formats, expected_formats, None)
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_utils.py b/test/test_utils.py
index efa73d0f4..cc13f795c 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -540,6 +540,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('87 Min.'), 5220)
self.assertEqual(parse_duration('PT1H0.040S'), 3600.04)
self.assertEqual(parse_duration('PT00H03M30SZ'), 210)
+ self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88)
def test_fix_xml_ampersands(self):
self.assertEqual(
diff --git a/test/testdata/f4m/custom_base_url.f4m b/test/testdata/f4m/custom_base_url.f4m
new file mode 100644
index 000000000..74e1539e8
--- /dev/null
+++ b/test/testdata/f4m/custom_base_url.f4m
@@ -0,0 +1,10 @@
+
+
+ recorded
+ http://vod.livestream.com/events/0000000000673980/
+ 269.293
+ AAAAm2Fic3QAAAAAAAAAAQAAAAPoAAAAAAAEG+0AAAAAAAAAAAAAAAAAAQAAABlhc3J0AAAAAAAAAAABAAAAAQAAAC4BAAAAVmFmcnQAAAAAAAAD6AAAAAAEAAAAAQAAAAAAAAAAAAAXcAAAAC0AAAAAAAQHQAAAE5UAAAAuAAAAAAAEGtUAAAEYAAAAAAAAAAAAAAAAAAAAAAA=
+
+ AgAKb25NZXRhRGF0YQgAAAAIAAhkdXJhdGlvbgBAcNSwIMSbpgAFd2lkdGgAQJQAAAAAAAAABmhlaWdodABAhoAAAAAAAAAJZnJhbWVyYXRlAEA4/7DoLwW3AA12aWRlb2RhdGFyYXRlAECe1DLgjcobAAx2aWRlb2NvZGVjaWQAQBwAAAAAAAAADWF1ZGlvZGF0YXJhdGUAQGSimlvaPKQADGF1ZGlvY29kZWNpZABAJAAAAAAAAAAACQ==
+
+
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index c8fde9a89..fdb80f42a 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -243,8 +243,17 @@ def remove_encrypted_media(media):
media))
-def _add_ns(prop):
- return '{http://ns.adobe.com/f4m/1.0}%s' % prop
+def _add_ns(prop, ver=1):
+ return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
+
+
+def get_base_url(manifest):
+ base_url = xpath_text(
+ manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
+ 'base URL', default=None)
+ if base_url:
+ base_url = base_url.strip()
+ return base_url
class F4mFD(FragmentFD):
@@ -330,13 +339,13 @@ class F4mFD(FragmentFD):
rate, media = list(filter(
lambda f: int(f[0]) == requested_bitrate, formats))[0]
- base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
+ # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
+ man_base_url = get_base_url(doc) or man_url
+
+ base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
- # From Adobe F4M 3.0 spec:
- # The element SHALL be the base URL for all relative
- # (HTTP-based) URLs in the manifest. If is not present, said
- # URLs should be relative to the location of the containing document.
- boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
+ boot_info, bootstrap_url = self._parse_bootstrap_node(
+ bootstrap_node, man_base_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 20abd06f2..087463dfc 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -29,7 +29,10 @@ from ..compat import (
compat_urlparse,
compat_xml_parse_error,
)
-from ..downloader.f4m import remove_encrypted_media
+from ..downloader.f4m import (
+ get_base_url,
+ remove_encrypted_media,
+)
from ..utils import (
NO_DEFAULT,
age_restricted,
@@ -1239,11 +1242,8 @@ class InfoExtractor(object):
media_nodes = remove_encrypted_media(media_nodes)
if not media_nodes:
return formats
- base_url = xpath_text(
- manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
- 'base URL', default=None)
- if base_url:
- base_url = base_url.strip()
+
+ manifest_base_url = get_base_url(manifest)
bootstrap_info = xpath_element(
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
@@ -1275,7 +1275,7 @@ class InfoExtractor(object):
continue
manifest_url = (
media_url if media_url.startswith('http://') or media_url.startswith('https://')
- else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
+ else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
# If media_url is itself a f4m manifest do the recursive extraction
# since bitrates in parent manifest (this one) and media_url manifest
# may differ leading to inability to resolve the format by requested
@@ -1310,6 +1310,7 @@ class InfoExtractor(object):
'url': manifest_url,
'manifest_url': manifest_url,
'ext': 'flv' if bootstrap_info is not None else None,
+ 'protocol': 'f4m',
'tbr': tbr,
'width': width,
'height': height,
@@ -1401,7 +1402,7 @@ class InfoExtractor(object):
media_url = media.get('URI')
if media_url:
format_id = []
- for v in (group_id, name):
+ for v in (m3u8_id, group_id, name):
if v:
format_id.append(v)
f = {
@@ -2242,27 +2243,35 @@ class InfoExtractor(object):
return formats
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
+ query = compat_urlparse.urlparse(url).query
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
url_base = self._search_regex(
r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
http_base_url = '%s:%s' % ('http', url_base)
formats = []
+
+ def manifest_url(manifest):
+ m_url = '%s/%s' % (http_base_url, manifest)
+ if query:
+ m_url += '?%s' % query
+ return m_url
+
if 'm3u8' not in skip_protocols:
formats.extend(self._extract_m3u8_formats(
- http_base_url + '/playlist.m3u8', video_id, 'mp4',
+ manifest_url('playlist.m3u8'), video_id, 'mp4',
m3u8_entry_protocol, m3u8_id='hls', fatal=False))
if 'f4m' not in skip_protocols:
formats.extend(self._extract_f4m_formats(
- http_base_url + '/manifest.f4m',
+ manifest_url('manifest.f4m'),
video_id, f4m_id='hds', fatal=False))
if 'dash' not in skip_protocols:
formats.extend(self._extract_mpd_formats(
- http_base_url + '/manifest.mpd',
+ manifest_url('manifest.mpd'),
video_id, mpd_id='dash', fatal=False))
if re.search(r'(?:/smil:|\.smil)', url_base):
if 'smil' not in skip_protocols:
rtmp_formats = self._extract_smil_formats(
- http_base_url + '/jwplayer.smil',
+ manifest_url('jwplayer.smil'),
video_id, fatal=False)
for rtmp_format in rtmp_formats:
rtsp_format = rtmp_format.copy()
diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py
index 00fbbff2f..3a6d0560e 100644
--- a/youtube_dl/extractor/dctp.py
+++ b/youtube_dl/extractor/dctp.py
@@ -2,53 +2,85 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import unified_strdate
+from ..compat import compat_str
+from ..utils import (
+ float_or_none,
+ unified_strdate,
+)
class DctpTvIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P.+?)/$'
+ _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P[^/?#&]+)'
_TEST = {
'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
- 'md5': '174dd4a8a6225cf5655952f969cfbe24',
'info_dict': {
'id': '95eaa4f33dad413aa17b4ee613cccc6c',
'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
- 'ext': 'mp4',
+ 'ext': 'flv',
'title': 'Videoinstallation für eine Kaufhausfassade',
'description': 'Kurzfilm',
'upload_date': '20110407',
'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 71.24,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
},
}
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
+ display_id = self._match_id(url)
- object_id = self._html_search_meta('DC.identifier', webpage)
+ webpage = self._download_webpage(url, display_id)
- servers_json = self._download_json(
- 'http://www.dctp.tv/elastic_streaming_client/get_streaming_server/',
- video_id, note='Downloading server list')
- server = servers_json[0]['server']
- m3u8_path = self._search_regex(
- r'\'([^\'"]+/playlist\.m3u8)"', webpage, 'm3u8 path')
- formats = self._extract_m3u8_formats(
- 'http://%s%s' % (server, m3u8_path), video_id, ext='mp4',
- entry_protocol='m3u8_native')
+ video_id = self._html_search_meta(
+ 'DC.identifier', webpage, 'video id',
+ default=None) or self._search_regex(
+ r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
title = self._og_search_title(webpage)
+
+ servers = self._download_json(
+ 'http://www.dctp.tv/streaming_servers/', display_id,
+ note='Downloading server list', fatal=False)
+
+ if servers:
+ endpoint = next(
+ server['endpoint']
+ for server in servers
+ if isinstance(server.get('endpoint'), compat_str) and
+ 'cloudfront' in server['endpoint'])
+ else:
+ endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
+
+ app = self._search_regex(
+ r'^rtmpe?://[^/]+/(?P.*)$', endpoint, 'app')
+
+ formats = [{
+ 'url': endpoint,
+ 'app': app,
+ 'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
+ 'page_url': url,
+ 'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
+ 'ext': 'flv',
+ }]
+
description = self._html_search_meta('DC.description', webpage)
upload_date = unified_strdate(
self._html_search_meta('DC.date.created', webpage))
thumbnail = self._og_search_thumbnail(webpage)
+ duration = float_or_none(self._search_regex(
+ r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
+ default=None), scale=1000)
return {
- 'id': object_id,
+ 'id': video_id,
'title': title,
'formats': formats,
- 'display_id': video_id,
+ 'display_id': display_id,
'description': description,
'upload_date': upload_date,
'thumbnail': thumbnail,
+ 'duration': duration,
}
diff --git a/youtube_dl/extractor/egghead.py b/youtube_dl/extractor/egghead.py
index e4a3046af..edabaafe6 100644
--- a/youtube_dl/extractor/egghead.py
+++ b/youtube_dl/extractor/egghead.py
@@ -2,7 +2,9 @@
from __future__ import unicode_literals
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
+ determine_ext,
int_or_none,
try_get,
unified_timestamp,
@@ -17,7 +19,7 @@ class EggheadCourseIE(InfoExtractor):
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
'playlist_count': 29,
'info_dict': {
- 'id': 'professor-frisby-introduces-composable-functional-javascript',
+ 'id': '72',
'title': 'Professor Frisby Introduces Composable Functional JavaScript',
'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
},
@@ -26,14 +28,28 @@ class EggheadCourseIE(InfoExtractor):
def _real_extract(self, url):
playlist_id = self._match_id(url)
- course = self._download_json(
- 'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id)
+ lessons = self._download_json(
+ 'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
+ playlist_id, 'Downloading course lessons JSON')
- entries = [
- self.url_result(
- 'wistia:%s' % lesson['wistia_id'], ie='Wistia',
- video_id=lesson['wistia_id'], video_title=lesson.get('title'))
- for lesson in course['lessons'] if lesson.get('wistia_id')]
+ entries = []
+ for lesson in lessons:
+ lesson_url = lesson.get('http_url')
+ if not lesson_url or not isinstance(lesson_url, compat_str):
+ continue
+ lesson_id = lesson.get('id')
+ if lesson_id:
+ lesson_id = compat_str(lesson_id)
+ entries.append(self.url_result(
+ lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
+
+ course = self._download_json(
+ 'https://egghead.io/api/v1/series/%s' % playlist_id,
+ playlist_id, 'Downloading course JSON', fatal=False) or {}
+
+ playlist_id = course.get('id')
+ if playlist_id:
+ playlist_id = compat_str(playlist_id)
return self.playlist_result(
entries, playlist_id, course.get('title'),
@@ -43,11 +59,12 @@ class EggheadCourseIE(InfoExtractor):
class EggheadLessonIE(InfoExtractor):
IE_DESC = 'egghead.io lesson'
IE_NAME = 'egghead:lesson'
- _VALID_URL = r'https://egghead\.io/lessons/(?P[^/?#&]+)'
- _TEST = {
+ _VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P[^/?#&]+)'
+ _TESTS = [{
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
'info_dict': {
- 'id': 'fv5yotjxcg',
+ 'id': '1196',
+ 'display_id': 'javascript-linear-data-flow-with-container-style-types-box',
'ext': 'mp4',
'title': 'Create linear data flow with container style types (Box)',
'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
@@ -60,25 +77,51 @@ class EggheadLessonIE(InfoExtractor):
},
'params': {
'skip_download': True,
+ 'format': 'bestvideo',
},
- }
+ }, {
+ 'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
- lesson_id = self._match_id(url)
+ display_id = self._match_id(url)
lesson = self._download_json(
- 'https://egghead.io/api/v1/lessons/%s' % lesson_id, lesson_id)
+ 'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)
+
+ lesson_id = compat_str(lesson['id'])
+ title = lesson['title']
+
+ formats = []
+ for _, format_url in lesson['media_urls'].items():
+ if not format_url or not isinstance(format_url, compat_str):
+ continue
+ ext = determine_ext(format_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, lesson_id, 'mp4', entry_protocol='m3u8',
+ m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, lesson_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ })
+ self._sort_formats(formats)
return {
- '_type': 'url_transparent',
- 'ie_key': 'Wistia',
- 'url': 'wistia:%s' % lesson['wistia_id'],
- 'id': lesson['wistia_id'],
- 'title': lesson.get('title'),
+ 'id': lesson_id,
+ 'display_id': display_id,
+ 'title': title,
'description': lesson.get('summary'),
'thumbnail': lesson.get('thumb_nail'),
'timestamp': unified_timestamp(lesson.get('published_at')),
'duration': int_or_none(lesson.get('duration')),
'view_count': int_or_none(lesson.get('plays_count')),
'tags': try_get(lesson, lambda x: x['tag_list'], list),
+ 'series': try_get(
+ lesson, lambda x: x['series']['title'], compat_str),
+ 'formats': formats,
}
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index faa1f4c16..0e1212c74 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -1111,10 +1111,6 @@ from .tvplayer import TVPlayerIE
from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE
from .twentymin import TwentyMinutenIE
-from .twentytwotracks import (
- TwentyTwoTracksIE,
- TwentyTwoTracksGenreIE
-)
from .twitch import (
TwitchVideoIE,
TwitchChapterIE,
@@ -1336,6 +1332,11 @@ from .youku import (
YoukuIE,
YoukuShowIE,
)
+from .younow import (
+ YouNowLiveIE,
+ YouNowChannelIE,
+ YouNowMomentIE,
+)
from .youporn import YouPornIE
from .yourupload import YourUploadIE
from .youtube import (
diff --git a/youtube_dl/extractor/fxnetworks.py b/youtube_dl/extractor/fxnetworks.py
index 629897317..37549fb01 100644
--- a/youtube_dl/extractor/fxnetworks.py
+++ b/youtube_dl/extractor/fxnetworks.py
@@ -3,27 +3,31 @@ from __future__ import unicode_literals
from .adobepass import AdobePassIE
from ..utils import (
- update_url_query,
extract_attributes,
+ int_or_none,
parse_age_limit,
smuggle_url,
+ update_url_query,
)
class FXNetworksIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P\d+)'
_TESTS = [{
- 'url': 'http://www.fxnetworks.com/video/719841347694',
- 'md5': '1447d4722e42ebca19e5232ab93abb22',
+ 'url': 'http://www.fxnetworks.com/video/1032565827847',
+ 'md5': '8d99b97b4aa7a202f55b6ed47ea7e703',
'info_dict': {
- 'id': '719841347694',
+ 'id': 'dRzwHC_MMqIv',
'ext': 'mp4',
- 'title': 'Vanpage',
- 'description': 'F*ck settling down. You\'re the Worst returns for an all new season August 31st on FXX.',
+ 'title': 'First Look: Better Things - Season 2',
+ 'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.',
'age_limit': 14,
'uploader': 'NEWA-FNG-FX',
- 'upload_date': '20160706',
- 'timestamp': 1467844741,
+ 'upload_date': '20170825',
+ 'timestamp': 1503686274,
+ 'episode_number': 0,
+ 'season_number': 2,
+ 'series': 'Better Things',
},
'add_ie': ['ThePlatform'],
}, {
@@ -64,6 +68,9 @@ class FXNetworksIE(AdobePassIE):
'id': video_id,
'title': title,
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
+ 'series': video_data.get('data-show-title'),
+ 'episode_number': int_or_none(video_data.get('data-episode')),
+ 'season_number': int_or_none(video_data.get('data-season')),
'thumbnail': video_data.get('data-large-thumb'),
'age_limit': parse_age_limit(rating),
'ie_key': 'ThePlatform',
diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py
index 02804d297..6d177cbaf 100644
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -14,7 +14,7 @@ from ..utils import (
class GameSpotIE(OnceIE):
- _VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P\d+)/?'
+ _VALID_URL = r'https?://(?:www\.)?gamespot\.com/videos/(?:[^/]+/\d+-|embed/)(?P\d+)'
_TESTS = [{
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
@@ -35,6 +35,9 @@ class GameSpotIE(OnceIE):
'params': {
'skip_download': True, # m3u8 downloads
},
+ }, {
+ 'url': 'https://www.gamespot.com/videos/embed/6439218/',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -52,7 +55,7 @@ class GameSpotIE(OnceIE):
manifest_url = f4m_url
formats.extend(self._extract_f4m_formats(
f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False))
- m3u8_url = streams.get('m3u8_stream')
+ m3u8_url = dict_get(streams, ('m3u8_stream', 'adaptive_stream'))
if m3u8_url:
manifest_url = m3u8_url
m3u8_formats = self._extract_m3u8_formats(
@@ -60,7 +63,7 @@ class GameSpotIE(OnceIE):
m3u8_id='hls', fatal=False)
formats.extend(m3u8_formats)
progressive_url = dict_get(
- streams, ('progressive_hd', 'progressive_high', 'progressive_low'))
+ streams, ('progressive_hd', 'progressive_high', 'progressive_low', 'other_lr'))
if progressive_url and manifest_url:
qualities_basename = self._search_regex(
r'/([^/]+)\.csmil/',
diff --git a/youtube_dl/extractor/skysports.py b/youtube_dl/extractor/skysports.py
index 4ca9f6b3c..efcbb36a9 100644
--- a/youtube_dl/extractor/skysports.py
+++ b/youtube_dl/extractor/skysports.py
@@ -2,7 +2,12 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import strip_or_none
+from ..utils import (
+ extract_attributes,
+ smuggle_url,
+ strip_or_none,
+ urljoin,
+)
class SkySportsIE(InfoExtractor):
@@ -22,12 +27,22 @@ class SkySportsIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ video_data = extract_attributes(self._search_regex(
+ r'(]+>)', webpage, 'video data'))
+
+ video_url = 'ooyala:%s' % video_data['data-video-id']
+ if video_data.get('data-token-required') == 'true':
+ token_fetch_options = self._parse_json(video_data.get('data-token-fetch-options', '{}'), video_id, fatal=False) or {}
+ token_fetch_url = token_fetch_options.get('url')
+ if token_fetch_url:
+ embed_token = self._download_webpage(urljoin(url, token_fetch_url), video_id, fatal=False)
+ if embed_token:
+ video_url = smuggle_url(video_url, {'embed_token': embed_token.strip('"')})
return {
'_type': 'url_transparent',
'id': video_id,
- 'url': 'ooyala:%s' % self._search_regex(
- r'data-video-id="([^"]+)"', webpage, 'ooyala id'),
+ 'url': video_url,
'title': self._og_search_title(webpage),
'description': strip_or_none(self._og_search_description(webpage)),
'ie_key': 'Ooyala',
diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py
index 3394c7e6b..2863e53b5 100644
--- a/youtube_dl/extractor/spankbang.py
+++ b/youtube_dl/extractor/spankbang.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..utils import ExtractorError
class SpankBangIE(InfoExtractor):
@@ -33,6 +34,10 @@ class SpankBangIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
+ raise ExtractorError(
+ 'Video %s is not available' % video_id, expected=True)
+
stream_key = self._html_search_regex(
r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
webpage, 'stream key')
diff --git a/youtube_dl/extractor/twentytwotracks.py b/youtube_dl/extractor/twentytwotracks.py
deleted file mode 100644
index d6c0ab184..000000000
--- a/youtube_dl/extractor/twentytwotracks.py
+++ /dev/null
@@ -1,86 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-# 22Tracks regularly replace the audio tracks that can be streamed on their
-# site. The tracks usually expire after 1 months, so we can't add tests.
-
-
-class TwentyTwoTracksIE(InfoExtractor):
- _VALID_URL = r'https?://22tracks\.com/(?P[a-z]+)/(?P[\da-z]+)/(?P\d+)'
- IE_NAME = '22tracks:track'
-
- _API_BASE = 'http://22tracks.com/api'
-
- def _extract_info(self, city, genre_name, track_id=None):
- item_id = track_id if track_id else genre_name
-
- cities = self._download_json(
- '%s/cities' % self._API_BASE, item_id,
- 'Downloading cities info',
- 'Unable to download cities info')
- city_id = [x['id'] for x in cities if x['slug'] == city][0]
-
- genres = self._download_json(
- '%s/genres/%s' % (self._API_BASE, city_id), item_id,
- 'Downloading %s genres info' % city,
- 'Unable to download %s genres info' % city)
- genre = [x for x in genres if x['slug'] == genre_name][0]
- genre_id = genre['id']
-
- tracks = self._download_json(
- '%s/tracks/%s' % (self._API_BASE, genre_id), item_id,
- 'Downloading %s genre tracks info' % genre_name,
- 'Unable to download track info')
-
- return [x for x in tracks if x['id'] == item_id][0] if track_id else [genre['title'], tracks]
-
- def _get_track_url(self, filename, track_id):
- token = self._download_json(
- 'http://22tracks.com/token.php?desktop=true&u=/128/%s' % filename,
- track_id, 'Downloading token', 'Unable to download token')
- return 'http://audio.22tracks.com%s?st=%s&e=%d' % (token['filename'], token['st'], token['e'])
-
- def _extract_track_info(self, track_info, track_id):
- download_url = self._get_track_url(track_info['filename'], track_id)
- title = '%s - %s' % (track_info['artist'].strip(), track_info['title'].strip())
- return {
- 'id': track_id,
- 'url': download_url,
- 'ext': 'mp3',
- 'title': title,
- 'duration': int_or_none(track_info.get('duration')),
- 'timestamp': int_or_none(track_info.get('published_at') or track_info.get('created'))
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- city = mobj.group('city')
- genre = mobj.group('genre')
- track_id = mobj.group('id')
-
- track_info = self._extract_info(city, genre, track_id)
- return self._extract_track_info(track_info, track_id)
-
-
-class TwentyTwoTracksGenreIE(TwentyTwoTracksIE):
- _VALID_URL = r'https?://22tracks\.com/(?P[a-z]+)/(?P[\da-z]+)/?$'
- IE_NAME = '22tracks:genre'
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- city = mobj.group('city')
- genre = mobj.group('genre')
-
- genre_title, tracks = self._extract_info(city, genre)
-
- entries = [
- self._extract_track_info(track_info, track_info['id'])
- for track_info in tracks]
-
- return self.playlist_result(entries, genre, genre_title)
diff --git a/youtube_dl/extractor/younow.py b/youtube_dl/extractor/younow.py
new file mode 100644
index 000000000..04dbc87fc
--- /dev/null
+++ b/youtube_dl/extractor/younow.py
@@ -0,0 +1,202 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+)
+
+CDN_API_BASE = 'https://cdn.younow.com/php/api'
+MOMENT_URL_FORMAT = '%s/moment/fetch/id=%%s' % CDN_API_BASE
+
+
+class YouNowLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.younow.com/AmandaPadeezy',
+ 'info_dict': {
+ 'id': 'AmandaPadeezy',
+ 'ext': 'mp4',
+ 'is_live': True,
+ 'title': 'March 26, 2017',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'tags': ['girls'],
+ 'categories': ['girls'],
+ 'uploader': 'AmandaPadeezy',
+ 'uploader_id': '6716501',
+ 'uploader_url': 'https://www.younow.com/AmandaPadeezy',
+ 'creator': 'AmandaPadeezy',
+ },
+ 'skip': True,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if YouNowChannelIE.suitable(url) or YouNowMomentIE.suitable(url)
+ else super(YouNowLiveIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ username = self._match_id(url)
+
+ data = self._download_json(
+ 'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
+ % username, username)
+
+ if data.get('errorCode') != 0:
+ raise ExtractorError(data['errorMsg'], expected=True)
+
+ uploader = try_get(
+ data, lambda x: x['user']['profileUrlString'],
+ compat_str) or username
+
+ return {
+ 'id': uploader,
+ 'is_live': True,
+ 'title': self._live_title(uploader),
+ 'thumbnail': data.get('awsUrl'),
+ 'tags': data.get('tags'),
+ 'categories': data.get('tags'),
+ 'uploader': uploader,
+ 'uploader_id': data.get('userId'),
+ 'uploader_url': 'https://www.younow.com/%s' % username,
+ 'creator': uploader,
+ 'view_count': int_or_none(data.get('viewers')),
+ 'like_count': int_or_none(data.get('likes')),
+ 'formats': [{
+ 'url': '%s/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s'
+ % (CDN_API_BASE, data['broadcastId'], data['userId']),
+ 'ext': 'mp4',
+ 'protocol': 'm3u8',
+ }],
+ }
+
+
+def _extract_moment(item, fatal=True):
+ moment_id = item.get('momentId')
+ if not moment_id:
+ if not fatal:
+ return
+ raise ExtractorError('Unable to extract moment id')
+
+ moment_id = compat_str(moment_id)
+
+ title = item.get('text')
+ if not title:
+ title = 'YouNow %s' % (
+ item.get('momentType') or item.get('titleType') or 'moment')
+
+ uploader = try_get(item, lambda x: x['owner']['name'], compat_str)
+ uploader_id = try_get(item, lambda x: x['owner']['userId'])
+ uploader_url = 'https://www.younow.com/%s' % uploader if uploader else None
+
+ entry = {
+ 'extractor_key': 'YouNowMoment',
+ 'id': moment_id,
+ 'title': title,
+ 'view_count': int_or_none(item.get('views')),
+ 'like_count': int_or_none(item.get('likes')),
+ 'timestamp': int_or_none(item.get('created')),
+ 'creator': uploader,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'uploader_url': uploader_url,
+ 'formats': [{
+ 'url': 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8'
+ % (moment_id, moment_id),
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ }],
+ }
+
+ return entry
+
+
+class YouNowChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P[^/]+)/channel'
+ _TEST = {
+ 'url': 'https://www.younow.com/its_Kateee_/channel',
+ 'info_dict': {
+ 'id': '14629760',
+ 'title': 'its_Kateee_ moments'
+ },
+ 'playlist_mincount': 8,
+ }
+
+ def _entries(self, username, channel_id):
+ created_before = 0
+ for page_num in itertools.count(1):
+ if created_before is None:
+ break
+ info = self._download_json(
+ '%s/moment/profile/channelId=%s/createdBefore=%d/records=20'
+ % (CDN_API_BASE, channel_id, created_before), username,
+ note='Downloading moments page %d' % page_num)
+ items = info.get('items')
+ if not items or not isinstance(items, list):
+ break
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ item_type = item.get('type')
+ if item_type == 'moment':
+ entry = _extract_moment(item, fatal=False)
+ if entry:
+ yield entry
+ elif item_type == 'collection':
+ moments = item.get('momentsIds')
+ if isinstance(moments, list):
+ for moment_id in moments:
+ m = self._download_json(
+ MOMENT_URL_FORMAT % moment_id, username,
+ note='Downloading %s moment JSON' % moment_id,
+ fatal=False)
+ if m and isinstance(m, dict) and m.get('item'):
+ entry = _extract_moment(m['item'])
+ if entry:
+ yield entry
+ created_before = int_or_none(item.get('created'))
+
+ def _real_extract(self, url):
+ username = self._match_id(url)
+ channel_id = compat_str(self._download_json(
+ 'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
+ % username, username, note='Downloading user information')['userId'])
+ return self.playlist_result(
+ self._entries(username, channel_id), channel_id,
+ '%s moments' % username)
+
+
+class YouNowMomentIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?younow\.com/[^/]+/(?P[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.younow.com/GABO.../20712117/36319236/3b316doc/m',
+ 'md5': 'a30c70eadb9fb39a1aa3c8c0d22a0807',
+ 'info_dict': {
+ 'id': '20712117',
+ 'ext': 'mp4',
+ 'title': 'YouNow capture',
+ 'view_count': int,
+ 'like_count': int,
+ 'timestamp': 1490432040,
+ 'upload_date': '20170325',
+ 'uploader': 'GABO...',
+ 'uploader_id': 35917228,
+ },
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if YouNowChannelIE.suitable(url)
+ else super(YouNowMomentIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ item = self._download_json(MOMENT_URL_FORMAT % video_id, video_id)
+ return _extract_moment(item['item'])
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 59fb33435..34866a54b 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1835,10 +1835,20 @@ def parse_duration(s):
days, hours, mins, secs, ms = m.groups()
else:
m = re.match(
- r'''(?ix)(?:P?T)?
+ r'''(?ix)(?:P?
+ (?:
+ [0-9]+\s*y(?:ears?)?\s*
+ )?
+ (?:
+ [0-9]+\s*m(?:onths?)?\s*
+ )?
+ (?:
+ [0-9]+\s*w(?:eeks?)?\s*
+ )?
(?:
(?P[0-9]+)\s*d(?:ays?)?\s*
)?
+ T)?
(?:
(?P[0-9]+)\s*h(?:ours?)?\s*
)?
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 4d1686670..43f080bc3 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,3 +1,3 @@
from __future__ import unicode_literals
-__version__ = '2017.10.20'
+__version__ = '2017.10.29'