From 67dfbc0cb92a19eda2981528b1456bdc0e3cb805 Mon Sep 17 00:00:00 2001
From: Ismael Mejia
Date: Wed, 7 Aug 2013 18:42:40 +0200
Subject: [PATCH 001/215] Added exceptions for the subtitle and video types in
.gitignore
---
.gitignore | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/.gitignore b/.gitignore
index ca4e8f353..fca34b8ba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,7 @@ build/
dist/
MANIFEST
README.txt
+README.md
youtube-dl.1
youtube-dl.bash-completion
youtube-dl
@@ -17,4 +18,10 @@ youtube-dl.tar.gz
.coverage
cover/
updates_key.pem
-*.egg-info
\ No newline at end of file
+*.egg-info
+*.srt
+*.sbv
+*.vtt
+*.flv
+*.mp4
+*.part
From 5898e282726bc2f54fc52fe425c389226e31a797 Mon Sep 17 00:00:00 2001
From: Ismael Mejia
Date: Wed, 7 Aug 2013 18:48:24 +0200
Subject: [PATCH 002/215] Fixed small type issue
---
youtube_dl/YoutubeDL.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index e69d844b8..beed79fd0 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -492,7 +492,8 @@ class YoutubeDL(object):
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['subtitles']
sub_format = self.params.get('subtitlesformat')
- for sub_lang in subtitles.keys():
+
+ for sub_lang in subtitles:
sub = subtitles[sub_lang]
if sub is None:
continue
From 953e32b2c1be077e65bba844010a5a2707af2e2b Mon Sep 17 00:00:00 2001
From: Ismael Mejia
Date: Wed, 7 Aug 2013 18:59:11 +0200
Subject: [PATCH 003/215] [dailymotion] Added support for subtitles + new
InfoExtractor for generic subtitle download.
The idea is that all subtitle downloaders must descend from SubtitlesIE
and implement only three basic methods to achieve the complete subtitle
download functionality. This will allow to reduce the code in YoutubeIE
once it is rewritten.
---
test/test_dailymotion_subtitles.py | 96 +++++++++++++++++++++++++++++
youtube_dl/__init__.py | 10 +--
youtube_dl/extractor/dailymotion.py | 67 ++++++++++++++++++--
youtube_dl/extractor/subtitles.py | 80 ++++++++++++++++++++++++
4 files changed, 242 insertions(+), 11 deletions(-)
create mode 100644 test/test_dailymotion_subtitles.py
create mode 100644 youtube_dl/extractor/subtitles.py
diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py
new file mode 100644
index 000000000..f63426a18
--- /dev/null
+++ b/test/test_dailymotion_subtitles.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+
+import sys
+import unittest
+import json
+import io
+import hashlib
+
+# Allow direct execution
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.extractor import DailymotionIE
+from youtube_dl.utils import *
+from helper import FakeYDL
+
+md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
+TEST_URL = 'http://www.dailymotion.com/video/xczg00'
+
+class TestDailymotionSubtitles(unittest.TestCase):
+ def setUp(self):
+ DL = FakeYDL()
+ DL.params['allsubtitles'] = False
+ DL.params['writesubtitles'] = False
+ DL.params['subtitlesformat'] = 'srt'
+ DL.params['listsubtitles'] = False
+ def test_no_subtitles(self):
+ DL = FakeYDL()
+ DL.params['writesubtitles'] = False
+ IE = DailymotionIE(DL)
+ info_dict = IE.extract(TEST_URL)
+ subtitles = info_dict[0]['subtitles']
+ self.assertEqual(subtitles, None)
+ def test_subtitles(self):
+ DL = FakeYDL()
+ DL.params['writesubtitles'] = True
+ IE = DailymotionIE(DL)
+ info_dict = IE.extract(TEST_URL)
+ sub = info_dict[0]['subtitles']['en']
+ self.assertEqual(md5(sub), '976553874490cba125086bbfea3ff76f')
+ def test_subtitles_fr(self):
+ DL = FakeYDL()
+ DL.params['writesubtitles'] = True
+ DL.params['subtitleslang'] = 'fr'
+ IE = DailymotionIE(DL)
+ info_dict = IE.extract(TEST_URL)
+ sub = info_dict[0]['subtitles']['fr']
+ self.assertEqual(md5(sub), '594564ec7d588942e384e920e5341792')
+ def test_onlysubtitles(self):
+ DL = FakeYDL()
+ DL.params['writesubtitles'] = True
+ DL.params['onlysubtitles'] = True
+ IE = DailymotionIE(DL)
+ info_dict = IE.extract(TEST_URL)
+ sub = info_dict[0]['subtitles']['en']
+ self.assertEqual(md5(sub), '976553874490cba125086bbfea3ff76f')
+ def test_allsubtitles(self):
+ DL = FakeYDL()
+ DL.params['allsubtitles'] = True
+ IE = DailymotionIE(DL)
+ info_dict = IE.extract(TEST_URL)
+ subtitles = info_dict[0]['subtitles']
+ self.assertEqual(len(subtitles.keys()), 5)
+ # def test_subtitles_sbv_format(self):
+ # DL = FakeYDL()
+ # DL.params['writesubtitles'] = True
+ # DL.params['subtitlesformat'] = 'sbv'
+ # IE = DailymotionIE(DL)
+ # info_dict = IE.extract(TEST_URL)
+ # sub = info_dict[0]['subtitles'][0]
+ # self.assertEqual(md5(sub), '13aeaa0c245a8bed9a451cb643e3ad8b')
+ # def test_subtitles_vtt_format(self):
+ # DL = FakeYDL()
+ # DL.params['writesubtitles'] = True
+ # DL.params['subtitlesformat'] = 'vtt'
+ # IE = DailymotionIE(DL)
+ # info_dict = IE.extract(TEST_URL)
+ # sub = info_dict[0]['subtitles'][0]
+ # self.assertEqual(md5(sub), '356cdc577fde0c6783b9b822e7206ff7')
+ def test_list_subtitles(self):
+ DL = FakeYDL()
+ DL.params['listsubtitles'] = True
+ IE = DailymotionIE(DL)
+ info_dict = IE.extract(TEST_URL)
+ self.assertEqual(info_dict, None)
+ def test_automatic_captions(self):
+ DL = FakeYDL()
+ DL.params['writeautomaticsub'] = True
+ DL.params['subtitleslang'] = 'en'
+ IE = DailymotionIE(DL)
+ info_dict = IE.extract(TEST_URL)
+ sub = info_dict[0]['subtitles']
+ self.assertTrue(len(sub) == 0)
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index eb23c53a5..c4d595e1c 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -187,22 +187,22 @@ def parseOpts(overrideArguments=None):
action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
video_format.add_option('--write-sub', '--write-srt',
action='store_true', dest='writesubtitles',
- help='write subtitle file (currently youtube only)', default=False)
+ help='write subtitle file', default=False)
video_format.add_option('--write-auto-sub', '--write-automatic-sub',
action='store_true', dest='writeautomaticsub',
- help='write automatic subtitle file (currently youtube only)', default=False)
+ help='write automatic subtitle file (youtube only)', default=False)
video_format.add_option('--only-sub',
action='store_true', dest='skip_download',
help='[deprecated] alias of --skip-download', default=False)
video_format.add_option('--all-subs',
action='store_true', dest='allsubtitles',
- help='downloads all the available subtitles of the video (currently youtube only)', default=False)
+ help='downloads all the available subtitles of the video', default=False)
video_format.add_option('--list-subs',
action='store_true', dest='listsubtitles',
- help='lists all available subtitles for the video (currently youtube only)', default=False)
+ help='lists all available subtitles for the video', default=False)
video_format.add_option('--sub-format',
action='store', dest='subtitlesformat', metavar='FORMAT',
- help='subtitle format [srt/sbv/vtt] (default=srt) (currently youtube only)', default='srt')
+ help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt')
video_format.add_option('--sub-lang', '--srt-lang',
action='store', dest='subtitleslang', metavar='LANG',
help='language of the subtitles to download (optional) use IETF language tags like \'en\'')
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 9bf7a28ca..eb2322d54 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -1,14 +1,49 @@
import re
import json
+import itertools
+import socket
from .common import InfoExtractor
+from .subtitles import SubtitlesIE
+
from ..utils import (
+ compat_http_client,
+ compat_urllib_error,
compat_urllib_request,
+ compat_str,
+ get_element_by_attribute,
+ get_element_by_id,
ExtractorError,
)
-class DailymotionIE(InfoExtractor):
+
+class DailyMotionSubtitlesIE(SubtitlesIE):
+
+ def _get_available_subtitles(self, video_id):
+ request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id)
+ try:
+ sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
+ return {}
+ info = json.loads(sub_list)
+ if (info['total'] > 0):
+ sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
+ return sub_lang_list
+ self._downloader.report_warning(u'video doesn\'t have subtitles')
+ return {}
+
+ def _get_subtitle_url(self, sub_lang, sub_name, video_id, format):
+ sub_lang_list = self._get_available_subtitles(video_id)
+ return sub_lang_list[sub_lang]
+
+ def _request_automatic_caption(self, video_id, webpage):
+ self._downloader.report_warning(u'Automatic Captions not supported by dailymotion')
+ return {}
+
+
+class DailymotionIE(DailyMotionSubtitlesIE): #,InfoExtractor):
"""Information Extractor for Dailymotion"""
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
@@ -18,7 +53,7 @@ class DailymotionIE(InfoExtractor):
u'file': u'x33vw9.mp4',
u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
u'info_dict': {
- u"uploader": u"Alex and Van .",
+ u"uploader": u"Alex and Van .",
u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
}
}
@@ -57,17 +92,36 @@ class DailymotionIE(InfoExtractor):
# TODO: support choosing qualities
- for key in ['stream_h264_hd1080_url','stream_h264_hd_url',
- 'stream_h264_hq_url','stream_h264_url',
+ for key in ['stream_h264_hd1080_url', 'stream_h264_hd_url',
+ 'stream_h264_hq_url', 'stream_h264_url',
'stream_h264_ld_url']:
- if info.get(key):#key in info and info[key]:
+ if info.get(key): # key in info and info[key]:
max_quality = key
- self.to_screen(u'Using %s' % key)
+ self.to_screen(u'%s: Using %s' % (video_id, key))
break
else:
raise ExtractorError(u'Unable to extract video URL')
video_url = info[max_quality]
+ # subtitles
+ video_subtitles = None
+ video_webpage = None
+
+ if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
+ video_subtitles = self._extract_subtitles(video_id)
+ elif self._downloader.params.get('writeautomaticsub', False):
+ video_subtitles = self._request_automatic_caption(video_id, video_webpage)
+
+ if self._downloader.params.get('listsubtitles', False):
+ self._list_available_subtitles(video_id)
+ return
+
+ if 'length_seconds' not in info:
+ self._downloader.report_warning(u'unable to extract video duration')
+ video_duration = ''
+ else:
+ video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
+
return [{
'id': video_id,
'url': video_url,
@@ -75,5 +129,6 @@ class DailymotionIE(InfoExtractor):
'upload_date': video_upload_date,
'title': self._og_search_title(webpage),
'ext': video_extension,
+ 'subtitles': video_subtitles,
'thumbnail': info['thumbnail_url']
}]
diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py
new file mode 100644
index 000000000..89864e5d7
--- /dev/null
+++ b/youtube_dl/extractor/subtitles.py
@@ -0,0 +1,80 @@
+import socket
+
+from .common import InfoExtractor
+
+from ..utils import (
+ compat_http_client,
+ compat_urllib_error,
+ compat_urllib_request,
+ compat_str,
+)
+
+
+class SubtitlesIE(InfoExtractor):
+
+ def report_video_subtitles_available(self, video_id, sub_lang_list):
+ """Report available subtitles."""
+ sub_lang = ",".join(list(sub_lang_list.keys()))
+ self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang))
+
+ def _list_available_subtitles(self, video_id):
+ sub_lang_list = self._get_available_subtitles(video_id)
+ self.report_video_subtitles_available(video_id, sub_lang_list)
+
+ def _extract_subtitles(self, video_id):
+ """
+ Return a dictionary: {language: subtitles} or {} if the subtitles
+ couldn't be found
+ """
+ sub_lang_list = self._get_available_subtitles(video_id)
+ sub_format = self._downloader.params.get('subtitlesformat')
+ if not sub_lang_list: #There was some error, it didn't get the available subtitles
+ return {}
+ if self._downloader.params.get('writesubtitles', False):
+ if self._downloader.params.get('subtitleslang', False):
+ sub_lang = self._downloader.params.get('subtitleslang')
+ elif 'en' in sub_lang_list:
+ sub_lang = 'en'
+ else:
+ sub_lang = list(sub_lang_list.keys())[0]
+ if not sub_lang in sub_lang_list:
+ self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
+ return {}
+ sub_lang_list = {sub_lang: sub_lang_list[sub_lang]}
+ subtitles = {}
+ for sub_lang in sub_lang_list:
+ subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
+ if subtitle:
+ subtitles[sub_lang] = subtitle
+ return subtitles
+
+ def _request_subtitle(self, sub_lang, sub_name, video_id, format):
+ """ Return the subtitle as a string or None if they are not found """
+ # return (u'Did not fetch video subtitles for %s' % sub_lang, None, None)
+ self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
+ url = self._get_subtitle_url(sub_lang, sub_name, video_id, format)
+ try:
+ sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
+ return
+ if not sub:
+ self._downloader.report_warning(u'Did not fetch video subtitles')
+ return
+ return sub
+
+ def _get_available_subtitles(self, video_id):
+ """Get available subtitles. Redefine in subclasses."""
+ """returns {(lang, url)} """
+ # return {}
+ pass
+
+ def _get_subtitle_url(self, sub_lang, sub_name, video_id, format):
+ """returns the url for the given subtitle. Redefine in subclasses."""
+ pass
+
+ def _request_automatic_caption(self, video_id, webpage):
+ """Request automatic caption. Redefine in subclasses."""
+ """returns a tuple of ... """
+ # return [(err_msg, None, None)]
+ pass
From 372297e713c92489c113bf8649ec4aa1d23511f9 Mon Sep 17 00:00:00 2001
From: Ismael Mejia
Date: Wed, 7 Aug 2013 21:24:42 +0200
Subject: [PATCH 004/215] Undo the previous commit (it was a mistake)
---
youtube_dl/YoutubeDL.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index beed79fd0..ed5492826 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -493,7 +493,7 @@ class YoutubeDL(object):
subtitles = info_dict['subtitles']
sub_format = self.params.get('subtitlesformat')
- for sub_lang in subtitles:
+ for sub_lang in subtitles.keys():
sub = subtitles[sub_lang]
if sub is None:
continue
From 8377574c9cb8740e24d45e9b3d30921fd6ec846c Mon Sep 17 00:00:00 2001
From: Ismael Mejia
Date: Thu, 8 Aug 2013 08:54:10 +0200
Subject: [PATCH 005/215] [internal] Improved subtitle architecture + (update
in youtube/dailymotion)
The structure of subtitles was refined, you only need to implement one
method that returns a dictionnary of the available subtitles (lang, url) to
support all the subtitle options in a website. I updated the subtitle
downloaders for youtube/dailymotion to show how it works.
---
youtube_dl/extractor/dailymotion.py | 15 +--
youtube_dl/extractor/subtitles.py | 27 ++---
youtube_dl/extractor/youtube.py | 175 ++++++++++------------------
3 files changed, 73 insertions(+), 144 deletions(-)
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index eb2322d54..97003ee35 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -1,6 +1,5 @@
import re
import json
-import itertools
import socket
from .common import InfoExtractor
@@ -34,16 +33,12 @@ class DailyMotionSubtitlesIE(SubtitlesIE):
self._downloader.report_warning(u'video doesn\'t have subtitles')
return {}
- def _get_subtitle_url(self, sub_lang, sub_name, video_id, format):
- sub_lang_list = self._get_available_subtitles(video_id)
- return sub_lang_list[sub_lang]
-
def _request_automatic_caption(self, video_id, webpage):
- self._downloader.report_warning(u'Automatic Captions not supported by dailymotion')
+ self._downloader.report_warning(u'Automatic Captions not supported by this server')
return {}
-class DailymotionIE(DailyMotionSubtitlesIE): #,InfoExtractor):
+class DailymotionIE(DailyMotionSubtitlesIE):
"""Information Extractor for Dailymotion"""
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
@@ -116,12 +111,6 @@ class DailymotionIE(DailyMotionSubtitlesIE): #,InfoExtractor):
self._list_available_subtitles(video_id)
return
- if 'length_seconds' not in info:
- self._downloader.report_warning(u'unable to extract video duration')
- video_duration = ''
- else:
- video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])
-
return [{
'id': video_id,
'url': video_url,
diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py
index 89864e5d7..8843e0220 100644
--- a/youtube_dl/extractor/subtitles.py
+++ b/youtube_dl/extractor/subtitles.py
@@ -15,7 +15,8 @@ class SubtitlesIE(InfoExtractor):
def report_video_subtitles_available(self, video_id, sub_lang_list):
"""Report available subtitles."""
sub_lang = ",".join(list(sub_lang_list.keys()))
- self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang))
+ self.to_screen(u'%s: Available subtitles for video: %s' %
+ (video_id, sub_lang))
def _list_available_subtitles(self, video_id):
sub_lang_list = self._get_available_subtitles(video_id)
@@ -27,9 +28,9 @@ class SubtitlesIE(InfoExtractor):
couldn't be found
"""
sub_lang_list = self._get_available_subtitles(video_id)
- sub_format = self._downloader.params.get('subtitlesformat')
- if not sub_lang_list: #There was some error, it didn't get the available subtitles
+ if not sub_lang_list: # error, it didn't get the available subtitles
return {}
+
if self._downloader.params.get('writesubtitles', False):
if self._downloader.params.get('subtitleslang', False):
sub_lang = self._downloader.params.get('subtitleslang')
@@ -41,18 +42,15 @@ class SubtitlesIE(InfoExtractor):
self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
return {}
sub_lang_list = {sub_lang: sub_lang_list[sub_lang]}
+
subtitles = {}
- for sub_lang in sub_lang_list:
- subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
+ for sub_lang, url in sub_lang_list.iteritems():
+ subtitle = self._request_subtitle_url(sub_lang, url)
if subtitle:
subtitles[sub_lang] = subtitle
return subtitles
- def _request_subtitle(self, sub_lang, sub_name, video_id, format):
- """ Return the subtitle as a string or None if they are not found """
- # return (u'Did not fetch video subtitles for %s' % sub_lang, None, None)
- self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
- url = self._get_subtitle_url(sub_lang, sub_name, video_id, format)
+ def _request_subtitle_url(self, sub_lang, url):
try:
sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@@ -64,13 +62,8 @@ class SubtitlesIE(InfoExtractor):
return sub
def _get_available_subtitles(self, video_id):
- """Get available subtitles. Redefine in subclasses."""
- """returns {(lang, url)} """
- # return {}
- pass
-
- def _get_subtitle_url(self, sub_lang, sub_name, video_id, format):
- """returns the url for the given subtitle. Redefine in subclasses."""
+ """returns the list of available subtitles like this {lang: url} """
+ """or {} if not available. Must be redefined by the subclasses."""
pass
def _request_automatic_caption(self, video_id, webpage):
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 2b03226f6..414e33b49 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -7,6 +7,7 @@ import socket
import itertools
from .common import InfoExtractor, SearchInfoExtractor
+from .subtitles import SubtitlesIE
from ..utils import (
compat_http_client,
compat_parse_qs,
@@ -24,7 +25,66 @@ from ..utils import (
)
-class YoutubeIE(InfoExtractor):
+class YoutubeSubtitlesIE(SubtitlesIE):
+
+ def _get_available_subtitles(self, video_id):
+ request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
+ try:
+ sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
+ return {}
+ lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
+
+ sub_lang_list = {}
+ for l in lang_list:
+ lang = l[1]
+ params = compat_urllib_parse.urlencode({
+ 'lang': lang,
+ 'v': video_id,
+ 'fmt': self._downloader.params.get('subtitlesformat'),
+ })
+ url = u'http://www.youtube.com/api/timedtext?' + params
+ sub_lang_list[lang] = url
+ if not sub_lang_list:
+ self._downloader.report_warning(u'video doesn\'t have subtitles')
+ return {}
+ return sub_lang_list
+
+ def _request_automatic_caption(self, video_id, webpage):
+ """We need the webpage for getting the captions url, pass it as an
+ argument to speed up the process."""
+ sub_lang = self._downloader.params.get('subtitleslang') or 'en'
+ sub_format = self._downloader.params.get('subtitlesformat')
+ self.to_screen(u'%s: Looking for automatic captions' % video_id)
+ mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
+ err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
+ if mobj is None:
+ self._downloader.report_warning(err_msg)
+ return {}
+ player_config = json.loads(mobj.group(1))
+ try:
+ args = player_config[u'args']
+ caption_url = args[u'ttsurl']
+ timestamp = args[u'timestamp']
+ params = compat_urllib_parse.urlencode({
+ 'lang': 'en',
+ 'tlang': sub_lang,
+ 'fmt': sub_format,
+ 'ts': timestamp,
+ 'kind': 'asr',
+ })
+ subtitles_url = caption_url + '&' + params
+ sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
+ return {sub_lang: sub}
+ # An extractor error can be raise by the download process if there are
+ # no automatic captions but there are subtitles
+ except (KeyError, ExtractorError):
+ self._downloader.report_warning(err_msg)
+ return {}
+
+
+class YoutubeIE(YoutubeSubtitlesIE):
IE_DESC = u'YouTube.com'
_VALID_URL = r"""^
(
@@ -151,19 +211,6 @@ class YoutubeIE(InfoExtractor):
"""Report attempt to download video info webpage."""
self.to_screen(u'%s: Downloading video info webpage' % video_id)
- def report_video_subtitles_download(self, video_id):
- """Report attempt to download video info webpage."""
- self.to_screen(u'%s: Checking available subtitles' % video_id)
-
- def report_video_subtitles_request(self, video_id, sub_lang, format):
- """Report attempt to download video info webpage."""
- self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
-
- def report_video_subtitles_available(self, video_id, sub_lang_list):
- """Report available subtitles."""
- sub_lang = ",".join(list(sub_lang_list.keys()))
- self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang))
-
def report_information_extraction(self, video_id):
"""Report attempt to extract video information."""
self.to_screen(u'%s: Extracting video information' % video_id)
@@ -203,106 +250,6 @@ class YoutubeIE(InfoExtractor):
else:
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
- def _get_available_subtitles(self, video_id):
- self.report_video_subtitles_download(video_id)
- request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
- try:
- sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
- return {}
- sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
- sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
- if not sub_lang_list:
- self._downloader.report_warning(u'video doesn\'t have subtitles')
- return {}
- return sub_lang_list
-
- def _list_available_subtitles(self, video_id):
- sub_lang_list = self._get_available_subtitles(video_id)
- self.report_video_subtitles_available(video_id, sub_lang_list)
-
- def _request_subtitle(self, sub_lang, sub_name, video_id, format):
- """
- Return the subtitle as a string or None if they are not found
- """
- self.report_video_subtitles_request(video_id, sub_lang, format)
- params = compat_urllib_parse.urlencode({
- 'lang': sub_lang,
- 'name': sub_name,
- 'v': video_id,
- 'fmt': format,
- })
- url = 'http://www.youtube.com/api/timedtext?' + params
- try:
- sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
- return
- if not sub:
- self._downloader.report_warning(u'Did not fetch video subtitles')
- return
- return sub
-
- def _request_automatic_caption(self, video_id, webpage):
- """We need the webpage for getting the captions url, pass it as an
- argument to speed up the process."""
- sub_lang = self._downloader.params.get('subtitleslang') or 'en'
- sub_format = self._downloader.params.get('subtitlesformat')
- self.to_screen(u'%s: Looking for automatic captions' % video_id)
- mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
- err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
- if mobj is None:
- self._downloader.report_warning(err_msg)
- return {}
- player_config = json.loads(mobj.group(1))
- try:
- args = player_config[u'args']
- caption_url = args[u'ttsurl']
- timestamp = args[u'timestamp']
- params = compat_urllib_parse.urlencode({
- 'lang': 'en',
- 'tlang': sub_lang,
- 'fmt': sub_format,
- 'ts': timestamp,
- 'kind': 'asr',
- })
- subtitles_url = caption_url + '&' + params
- sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
- return {sub_lang: sub}
- # An extractor error can be raise by the download process if there are
- # no automatic captions but there are subtitles
- except (KeyError, ExtractorError):
- self._downloader.report_warning(err_msg)
- return {}
-
- def _extract_subtitles(self, video_id):
- """
- Return a dictionary: {language: subtitles} or {} if the subtitles
- couldn't be found
- """
- sub_lang_list = self._get_available_subtitles(video_id)
- sub_format = self._downloader.params.get('subtitlesformat')
- if not sub_lang_list: #There was some error, it didn't get the available subtitles
- return {}
- if self._downloader.params.get('writesubtitles', False):
- if self._downloader.params.get('subtitleslang', False):
- sub_lang = self._downloader.params.get('subtitleslang')
- elif 'en' in sub_lang_list:
- sub_lang = 'en'
- else:
- sub_lang = list(sub_lang_list.keys())[0]
- if not sub_lang in sub_lang_list:
- self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
- return {}
- sub_lang_list = {sub_lang: sub_lang_list[sub_lang]}
- subtitles = {}
- for sub_lang in sub_lang_list:
- subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
- if subtitle:
- subtitles[sub_lang] = subtitle
- return subtitles
-
def _print_formats(self, formats):
print('Available formats:')
for x in formats:
From 505c28aac90fbee46f0d54945b27e115f90785f2 Mon Sep 17 00:00:00 2001
From: Ismael Mejia
Date: Thu, 8 Aug 2013 09:53:25 +0200
Subject: [PATCH 006/215] Separated subtitle options in their own group
---
youtube_dl/__init__.py | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index c4d595e1c..8c6abddd9 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -119,6 +119,7 @@ def parseOpts(overrideArguments=None):
selection = optparse.OptionGroup(parser, 'Video Selection')
authentication = optparse.OptionGroup(parser, 'Authentication Options')
video_format = optparse.OptionGroup(parser, 'Video Format Options')
+ subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
downloader = optparse.OptionGroup(parser, 'Download Options')
postproc = optparse.OptionGroup(parser, 'Post-processing Options')
filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
@@ -185,25 +186,26 @@ def parseOpts(overrideArguments=None):
action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
video_format.add_option('-F', '--list-formats',
action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
- video_format.add_option('--write-sub', '--write-srt',
+
+ subtitles.add_option('--write-sub', '--write-srt',
action='store_true', dest='writesubtitles',
help='write subtitle file', default=False)
- video_format.add_option('--write-auto-sub', '--write-automatic-sub',
+ subtitles.add_option('--write-auto-sub', '--write-automatic-sub',
action='store_true', dest='writeautomaticsub',
help='write automatic subtitle file (youtube only)', default=False)
- video_format.add_option('--only-sub',
+ subtitles.add_option('--only-sub',
action='store_true', dest='skip_download',
help='[deprecated] alias of --skip-download', default=False)
- video_format.add_option('--all-subs',
+ subtitles.add_option('--all-subs',
action='store_true', dest='allsubtitles',
help='downloads all the available subtitles of the video', default=False)
- video_format.add_option('--list-subs',
+ subtitles.add_option('--list-subs',
action='store_true', dest='listsubtitles',
help='lists all available subtitles for the video', default=False)
- video_format.add_option('--sub-format',
+ subtitles.add_option('--sub-format',
action='store', dest='subtitlesformat', metavar='FORMAT',
help='subtitle format (default=srt) ([sbv/vtt] youtube only)', default='srt')
- video_format.add_option('--sub-lang', '--srt-lang',
+ subtitles.add_option('--sub-lang', '--srt-lang',
action='store', dest='subtitleslang', metavar='LANG',
help='language of the subtitles to download (optional) use IETF language tags like \'en\'')
@@ -328,6 +330,7 @@ def parseOpts(overrideArguments=None):
parser.add_option_group(filesystem)
parser.add_option_group(verbosity)
parser.add_option_group(video_format)
+ parser.add_option_group(subtitles)
parser.add_option_group(authentication)
parser.add_option_group(postproc)
From 33eb0ce4c4c515b30e5809f63f892b895601b442 Mon Sep 17 00:00:00 2001
From: Ismael Mejia
Date: Thu, 8 Aug 2013 10:06:24 +0200
Subject: [PATCH 007/215] [subtitles] removed only-sub option (--skip-download
achieves the same functionality)
---
test/parameters.json | 1 -
test/test_dailymotion_subtitles.py | 8 --------
test/test_youtube_subtitles.py | 8 --------
youtube_dl/__init__.py | 3 ---
4 files changed, 20 deletions(-)
diff --git a/test/parameters.json b/test/parameters.json
index 96998b5c3..f042880ed 100644
--- a/test/parameters.json
+++ b/test/parameters.json
@@ -38,7 +38,6 @@
"writedescription": false,
"writeinfojson": true,
"writesubtitles": false,
- "onlysubtitles": false,
"allsubtitles": false,
"listssubtitles": false
}
diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py
index f63426a18..32e3f6abe 100644
--- a/test/test_dailymotion_subtitles.py
+++ b/test/test_dailymotion_subtitles.py
@@ -46,14 +46,6 @@ class TestDailymotionSubtitles(unittest.TestCase):
info_dict = IE.extract(TEST_URL)
sub = info_dict[0]['subtitles']['fr']
self.assertEqual(md5(sub), '594564ec7d588942e384e920e5341792')
- def test_onlysubtitles(self):
- DL = FakeYDL()
- DL.params['writesubtitles'] = True
- DL.params['onlysubtitles'] = True
- IE = DailymotionIE(DL)
- info_dict = IE.extract(TEST_URL)
- sub = info_dict[0]['subtitles']['en']
- self.assertEqual(md5(sub), '976553874490cba125086bbfea3ff76f')
def test_allsubtitles(self):
DL = FakeYDL()
DL.params['allsubtitles'] = True
diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py
index fe0eac680..fe5d097ce 100644
--- a/test/test_youtube_subtitles.py
+++ b/test/test_youtube_subtitles.py
@@ -45,14 +45,6 @@ class TestYoutubeSubtitles(unittest.TestCase):
info_dict = IE.extract('QRS8MkLhQmM')
sub = info_dict[0]['subtitles']['it']
self.assertEqual(md5(sub), '164a51f16f260476a05b50fe4c2f161d')
- def test_youtube_onlysubtitles(self):
- DL = FakeYDL()
- DL.params['writesubtitles'] = True
- DL.params['onlysubtitles'] = True
- IE = YoutubeIE(DL)
- info_dict = IE.extract('QRS8MkLhQmM')
- sub = info_dict[0]['subtitles']['en']
- self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260')
def test_youtube_allsubtitles(self):
DL = FakeYDL()
DL.params['allsubtitles'] = True
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 8c6abddd9..34f3dad0f 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -193,9 +193,6 @@ def parseOpts(overrideArguments=None):
subtitles.add_option('--write-auto-sub', '--write-automatic-sub',
action='store_true', dest='writeautomaticsub',
help='write automatic subtitle file (youtube only)', default=False)
- subtitles.add_option('--only-sub',
- action='store_true', dest='skip_download',
- help='[deprecated] alias of --skip-download', default=False)
subtitles.add_option('--all-subs',
action='store_true', dest='allsubtitles',
help='downloads all the available subtitles of the video', default=False)
From 447591e1aea39f3100b66a7b94337bf67546663f Mon Sep 17 00:00:00 2001
From: Ismael Mejia
Date: Thu, 8 Aug 2013 11:03:52 +0200
Subject: [PATCH 008/215] [test] Cleaned subtitles tests
---
test/test_dailymotion_subtitles.py | 83 +++++++++-------------------
test/test_youtube_subtitles.py | 88 ++++++++++++------------------
2 files changed, 61 insertions(+), 110 deletions(-)
diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py
index 32e3f6abe..26c40493f 100644
--- a/test/test_dailymotion_subtitles.py
+++ b/test/test_dailymotion_subtitles.py
@@ -15,74 +15,43 @@ from youtube_dl.utils import *
from helper import FakeYDL
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
-TEST_URL = 'http://www.dailymotion.com/video/xczg00'
class TestDailymotionSubtitles(unittest.TestCase):
def setUp(self):
- DL = FakeYDL()
- DL.params['allsubtitles'] = False
- DL.params['writesubtitles'] = False
- DL.params['subtitlesformat'] = 'srt'
- DL.params['listsubtitles'] = False
+ self.DL = FakeYDL()
+ self.url = 'http://www.dailymotion.com/video/xczg00'
+ def getInfoDict(self):
+ IE = DailymotionIE(self.DL)
+ info_dict = IE.extract(self.url)
+ return info_dict
+ def getSubtitles(self):
+ info_dict = self.getInfoDict()
+ return info_dict[0]['subtitles']
def test_no_subtitles(self):
- DL = FakeYDL()
- DL.params['writesubtitles'] = False
- IE = DailymotionIE(DL)
- info_dict = IE.extract(TEST_URL)
- subtitles = info_dict[0]['subtitles']
+ subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
- DL = FakeYDL()
- DL.params['writesubtitles'] = True
- IE = DailymotionIE(DL)
- info_dict = IE.extract(TEST_URL)
- sub = info_dict[0]['subtitles']['en']
- self.assertEqual(md5(sub), '976553874490cba125086bbfea3ff76f')
+ self.DL.params['writesubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
def test_subtitles_fr(self):
- DL = FakeYDL()
- DL.params['writesubtitles'] = True
- DL.params['subtitleslang'] = 'fr'
- IE = DailymotionIE(DL)
- info_dict = IE.extract(TEST_URL)
- sub = info_dict[0]['subtitles']['fr']
- self.assertEqual(md5(sub), '594564ec7d588942e384e920e5341792')
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['subtitleslang'] = 'fr'
+ subtitles = self.getSubtitles()
+ self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
def test_allsubtitles(self):
- DL = FakeYDL()
- DL.params['allsubtitles'] = True
- IE = DailymotionIE(DL)
- info_dict = IE.extract(TEST_URL)
- subtitles = info_dict[0]['subtitles']
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 5)
- # def test_subtitles_sbv_format(self):
- # DL = FakeYDL()
- # DL.params['writesubtitles'] = True
- # DL.params['subtitlesformat'] = 'sbv'
- # IE = DailymotionIE(DL)
- # info_dict = IE.extract(TEST_URL)
- # sub = info_dict[0]['subtitles'][0]
- # self.assertEqual(md5(sub), '13aeaa0c245a8bed9a451cb643e3ad8b')
- # def test_subtitles_vtt_format(self):
- # DL = FakeYDL()
- # DL.params['writesubtitles'] = True
- # DL.params['subtitlesformat'] = 'vtt'
- # IE = DailymotionIE(DL)
- # info_dict = IE.extract(TEST_URL)
- # sub = info_dict[0]['subtitles'][0]
- # self.assertEqual(md5(sub), '356cdc577fde0c6783b9b822e7206ff7')
- def test_list_subtitles(self):
- DL = FakeYDL()
- DL.params['listsubtitles'] = True
- IE = DailymotionIE(DL)
- info_dict = IE.extract(TEST_URL)
+ def test_list_subtitles(self): #ojo
+ self.DL.params['listsubtitles'] = True
+ info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_automatic_captions(self):
- DL = FakeYDL()
- DL.params['writeautomaticsub'] = True
- DL.params['subtitleslang'] = 'en'
- IE = DailymotionIE(DL)
- info_dict = IE.extract(TEST_URL)
- sub = info_dict[0]['subtitles']
- self.assertTrue(len(sub) == 0)
+ self.DL.params['writeautomaticsub'] = True
+ self.DL.params['subtitleslang'] = 'en'
+ subtitles = self.getSubtitles()
+ self.assertTrue(len(subtitles.keys()) == 0)
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py
index fe5d097ce..aa6a1a434 100644
--- a/test/test_youtube_subtitles.py
+++ b/test/test_youtube_subtitles.py
@@ -18,70 +18,52 @@ md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
class TestYoutubeSubtitles(unittest.TestCase):
def setUp(self):
- DL = FakeYDL()
- DL.params['allsubtitles'] = False
- DL.params['writesubtitles'] = False
- DL.params['subtitlesformat'] = 'srt'
- DL.params['listsubtitles'] = False
+ self.DL = FakeYDL()
+ self.url = 'QRS8MkLhQmM'
+ def getInfoDict(self):
+ IE = YoutubeIE(self.DL)
+ info_dict = IE.extract(self.url)
+ return info_dict
+ def getSubtitles(self):
+ info_dict = self.getInfoDict()
+ return info_dict[0]['subtitles']
def test_youtube_no_subtitles(self):
- DL = FakeYDL()
- DL.params['writesubtitles'] = False
- IE = YoutubeIE(DL)
- info_dict = IE.extract('QRS8MkLhQmM')
- subtitles = info_dict[0]['subtitles']
+ self.DL.params['writesubtitles'] = False
+ subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_youtube_subtitles(self):
- DL = FakeYDL()
- DL.params['writesubtitles'] = True
- IE = YoutubeIE(DL)
- info_dict = IE.extract('QRS8MkLhQmM')
- sub = info_dict[0]['subtitles']['en']
- self.assertEqual(md5(sub), '4cd9278a35ba2305f47354ee13472260')
+ self.DL.params['writesubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
def test_youtube_subtitles_it(self):
- DL = FakeYDL()
- DL.params['writesubtitles'] = True
- DL.params['subtitleslang'] = 'it'
- IE = YoutubeIE(DL)
- info_dict = IE.extract('QRS8MkLhQmM')
- sub = info_dict[0]['subtitles']['it']
- self.assertEqual(md5(sub), '164a51f16f260476a05b50fe4c2f161d')
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['subtitleslang'] = 'it'
+ subtitles = self.getSubtitles()
+ self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
def test_youtube_allsubtitles(self):
- DL = FakeYDL()
- DL.params['allsubtitles'] = True
- IE = YoutubeIE(DL)
- info_dict = IE.extract('QRS8MkLhQmM')
- subtitles = info_dict[0]['subtitles']
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
def test_youtube_subtitles_sbv_format(self):
- DL = FakeYDL()
- DL.params['writesubtitles'] = True
- DL.params['subtitlesformat'] = 'sbv'
- IE = YoutubeIE(DL)
- info_dict = IE.extract('QRS8MkLhQmM')
- sub = info_dict[0]['subtitles']['en']
- self.assertEqual(md5(sub), '13aeaa0c245a8bed9a451cb643e3ad8b')
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['subtitlesformat'] = 'sbv'
+ subtitles = self.getSubtitles()
+ self.assertEqual(md5(subtitles['en']), '13aeaa0c245a8bed9a451cb643e3ad8b')
def test_youtube_subtitles_vtt_format(self):
- DL = FakeYDL()
- DL.params['writesubtitles'] = True
- DL.params['subtitlesformat'] = 'vtt'
- IE = YoutubeIE(DL)
- info_dict = IE.extract('QRS8MkLhQmM')
- sub = info_dict[0]['subtitles']['en']
- self.assertEqual(md5(sub), '356cdc577fde0c6783b9b822e7206ff7')
+ self.DL.params['writesubtitles'] = True
+ self.DL.params['subtitlesformat'] = 'vtt'
+ subtitles = self.getSubtitles()
+ self.assertEqual(md5(subtitles['en']), '356cdc577fde0c6783b9b822e7206ff7')
def test_youtube_list_subtitles(self):
- DL = FakeYDL()
- DL.params['listsubtitles'] = True
- IE = YoutubeIE(DL)
- info_dict = IE.extract('QRS8MkLhQmM')
+ self.DL.params['listsubtitles'] = True
+ info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
def test_youtube_automatic_captions(self):
- DL = FakeYDL()
- DL.params['writeautomaticsub'] = True
- DL.params['subtitleslang'] = 'it'
- IE = YoutubeIE(DL)
- info_dict = IE.extract('8YoUxe5ncPo')
- sub = info_dict[0]['subtitles']['it']
- self.assertTrue(sub is not None)
+ self.url = '8YoUxe5ncPo'
+ self.DL.params['writeautomaticsub'] = True
+ self.DL.params['subtitleslang'] = 'it'
+ subtitles = self.getSubtitles()
+ self.assertTrue(subtitles['it'] is not None)
if __name__ == '__main__':
unittest.main()
From 69df680b973841b61594c246a9cf4a708f09cb17 Mon Sep 17 00:00:00 2001
From: Ismael Mejia
Date: Thu, 8 Aug 2013 11:20:56 +0200
Subject: [PATCH 009/215] [subtitles] Improved docs + new class for servers who
don't support auto-caption
---
youtube_dl/extractor/dailymotion.py | 9 ++------
youtube_dl/extractor/subtitles.py | 32 +++++++++++++++--------------
2 files changed, 19 insertions(+), 22 deletions(-)
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 97003ee35..8fab16005 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -3,7 +3,7 @@ import json
import socket
from .common import InfoExtractor
-from .subtitles import SubtitlesIE
+from .subtitles import NoAutoSubtitlesIE
from ..utils import (
compat_http_client,
@@ -17,7 +17,7 @@ from ..utils import (
)
-class DailyMotionSubtitlesIE(SubtitlesIE):
+class DailyMotionSubtitlesIE(NoAutoSubtitlesIE):
def _get_available_subtitles(self, video_id):
request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id)
@@ -33,11 +33,6 @@ class DailyMotionSubtitlesIE(SubtitlesIE):
self._downloader.report_warning(u'video doesn\'t have subtitles')
return {}
- def _request_automatic_caption(self, video_id, webpage):
- self._downloader.report_warning(u'Automatic Captions not supported by this server')
- return {}
-
-
class DailymotionIE(DailyMotionSubtitlesIE):
"""Information Extractor for Dailymotion"""
diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py
index 8843e0220..caacea5fe 100644
--- a/youtube_dl/extractor/subtitles.py
+++ b/youtube_dl/extractor/subtitles.py
@@ -12,21 +12,15 @@ from ..utils import (
class SubtitlesIE(InfoExtractor):
- def report_video_subtitles_available(self, video_id, sub_lang_list):
- """Report available subtitles."""
+ def _list_available_subtitles(self, video_id):
+ """ outputs the available subtitles for the video """
+ sub_lang_list = self._get_available_subtitles(video_id)
sub_lang = ",".join(list(sub_lang_list.keys()))
self.to_screen(u'%s: Available subtitles for video: %s' %
(video_id, sub_lang))
- def _list_available_subtitles(self, video_id):
- sub_lang_list = self._get_available_subtitles(video_id)
- self.report_video_subtitles_available(video_id, sub_lang_list)
-
def _extract_subtitles(self, video_id):
- """
- Return a dictionary: {language: subtitles} or {} if the subtitles
- couldn't be found
- """
+ """ returns {sub_lang: sub} or {} if subtitles not found """
sub_lang_list = self._get_available_subtitles(video_id)
if not sub_lang_list: # error, it didn't get the available subtitles
return {}
@@ -51,6 +45,7 @@ class SubtitlesIE(InfoExtractor):
return subtitles
def _request_subtitle_url(self, sub_lang, url):
+ """ makes the http request for the subtitle """
try:
sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@@ -62,12 +57,19 @@ class SubtitlesIE(InfoExtractor):
return sub
def _get_available_subtitles(self, video_id):
- """returns the list of available subtitles like this {lang: url} """
- """or {} if not available. Must be redefined by the subclasses."""
+ """ returns {sub_lang: url} or {} if not available """
+ """ Must be redefined by the subclasses """
pass
def _request_automatic_caption(self, video_id, webpage):
- """Request automatic caption. Redefine in subclasses."""
- """returns a tuple of ... """
- # return [(err_msg, None, None)]
+ """ returns {sub_lang: sub} or {} if not available """
+ """ Must be redefined by the subclasses """
pass
+
+
+class NoAutoSubtitlesIE(SubtitlesIE):
+ """ A subtitle class for the servers that don't support auto-captions"""
+
+ def _request_automatic_caption(self, video_id, webpage):
+ self._downloader.report_warning(u'Automatic Captions not supported by this server')
+ return {}
From d55de6eec2adf7d1aaca87e75dad06ef15d9be26 Mon Sep 17 00:00:00 2001
From: Ismael Mejia
Date: Thu, 8 Aug 2013 18:30:04 +0200
Subject: [PATCH 010/215] [subtitles] Skips now the subtitles that has already
been downloaded.
Just a validation for file exists, I also removed a method that wasn't
been used because it was a copy paste from FileDownloader.
---
youtube_dl/YoutubeDL.py | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index ed5492826..e11d6f994 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -221,19 +221,16 @@ class YoutubeDL(object):
def report_writesubtitles(self, sub_filename):
""" Report that the subtitles file is being written """
- self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
+ self.to_screen(u'[info] Writing subtitle: ' + sub_filename)
+
+ def report_existingsubtitles(self, sub_filename):
+ """ Report that the subtitles file has been already written """
+ self.to_screen(u'[info] Skipping existing subtitle: ' + sub_filename)
def report_writeinfojson(self, infofn):
""" Report that the metadata file has been written """
self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
- def report_file_already_downloaded(self, file_name):
- """Report file has already been fully downloaded."""
- try:
- self.to_screen(u'[download] %s has already been downloaded' % file_name)
- except (UnicodeEncodeError) as err:
- self.to_screen(u'[download] The file has already been downloaded')
-
def increment_downloads(self):
"""Increment the ordinal that assigns a number to each file."""
self._num_downloads += 1
@@ -492,13 +489,16 @@ class YoutubeDL(object):
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['subtitles']
sub_format = self.params.get('subtitlesformat')
-
+
for sub_lang in subtitles.keys():
sub = subtitles[sub_lang]
if sub is None:
continue
try:
sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
+ if os.path.isfile(encodeFilename(sub_filename)):
+ self.report_existingsubtitles(sub_filename)
+ continue
self.report_writesubtitles(sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
subfile.write(sub)
From d80a064eff4fe2416f9db36b07f1e2ca641f1334 Mon Sep 17 00:00:00 2001
From: Ismael Mejia
Date: Thu, 8 Aug 2013 22:22:33 +0200
Subject: [PATCH 011/215] [subtitles] Added tests to check correct behavior
when no subtitles are available
---
test/test_dailymotion_subtitles.py | 9 +++++++--
test/test_youtube_subtitles.py | 10 ++++++++--
2 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py
index 26c40493f..efc4e574f 100644
--- a/test/test_dailymotion_subtitles.py
+++ b/test/test_dailymotion_subtitles.py
@@ -27,14 +27,14 @@ class TestDailymotionSubtitles(unittest.TestCase):
def getSubtitles(self):
info_dict = self.getInfoDict()
return info_dict[0]['subtitles']
- def test_no_subtitles(self):
+ def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f')
- def test_subtitles_fr(self):
+ def test_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslang'] = 'fr'
subtitles = self.getSubtitles()
@@ -52,6 +52,11 @@ class TestDailymotionSubtitles(unittest.TestCase):
self.DL.params['subtitleslang'] = 'en'
subtitles = self.getSubtitles()
self.assertTrue(len(subtitles.keys()) == 0)
+ def test_nosubtitles(self):
+ self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(len(subtitles), 0)
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py
index aa6a1a434..e40243077 100644
--- a/test/test_youtube_subtitles.py
+++ b/test/test_youtube_subtitles.py
@@ -27,7 +27,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
def getSubtitles(self):
info_dict = self.getInfoDict()
return info_dict[0]['subtitles']
- def test_youtube_no_subtitles(self):
+ def test_youtube_no_writesubtitles(self):
self.DL.params['writesubtitles'] = False
subtitles = self.getSubtitles()
self.assertEqual(subtitles, None)
@@ -35,7 +35,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
self.DL.params['writesubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260')
- def test_youtube_subtitles_it(self):
+ def test_youtube_subtitles_lang(self):
self.DL.params['writesubtitles'] = True
self.DL.params['subtitleslang'] = 'it'
subtitles = self.getSubtitles()
@@ -64,6 +64,12 @@ class TestYoutubeSubtitles(unittest.TestCase):
self.DL.params['subtitleslang'] = 'it'
subtitles = self.getSubtitles()
self.assertTrue(subtitles['it'] is not None)
+ def test_youtube_nosubtitles(self):
+ self.url = 'sAjKT8FhjI8'
+ self.DL.params['allsubtitles'] = True
+ subtitles = self.getSubtitles()
+ self.assertEqual(len(subtitles), 0)
+
if __name__ == '__main__':
unittest.main()
From b3f0e5304807862ce72c136da90b860df805ee5c Mon Sep 17 00:00:00 2001
From: Jai Grimshaw
Date: Sat, 31 Aug 2013 01:53:01 +1000
Subject: [PATCH 012/215] Fixed issue #1277 KeyError when no description.
Allows a continue with a warning when an extractor cannot retrieve a description.
---
youtube_dl/YoutubeDL.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index b289bd9e2..afce28040 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -492,6 +492,8 @@ class YoutubeDL(object):
self.report_writedescription(descfn)
with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
descfile.write(info_dict['description'])
+ except (KeyError, TypeError):
+ self.report_warning(u'Cannot extract description.')
except (OSError, IOError):
self.report_error(u'Cannot write description file ' + descfn)
return
From bdc6b3fc64a03045b8130cdc824ee3f6c15eeff1 Mon Sep 17 00:00:00 2001
From: Allan Zhou
Date: Fri, 30 Aug 2013 17:51:50 -0700
Subject: [PATCH 013/215] add support for "-f mp4" for YouTube
---
youtube_dl/__init__.py | 2 +-
youtube_dl/extractor/youtube.py | 37 ++++++++++++++++++++++++++-------
2 files changed, 31 insertions(+), 8 deletions(-)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 431460c57..b6b12683f 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -192,7 +192,7 @@ def parseOpts(overrideArguments=None):
video_format.add_option('-f', '--format',
action='store', dest='format', metavar='FORMAT',
- help='video format code, specifiy the order of preference using slashes: "-f 22/17/18"')
+ help='video format code, specifiy the order of preference using slashes: "-f 22/17/18". "-f mp4" and "-f flv" are also supported')
video_format.add_option('--all-formats',
action='store_const', dest='format', help='download all available video formats', const='all')
video_format.add_option('--prefer-free-formats',
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 8e486afd0..bd2b986ce 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -153,8 +153,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
$"""
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
# Listed in order of quality
- _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13',
- '95', '94', '93', '92', '132', '151',
+ _available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
+ # AHLS
+ '96', '95', '94', '93', '92', '132', '151',
# 3D
'85', '84', '102', '83', '101', '82', '100',
# Dash video
@@ -163,8 +164,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Dash audio
'141', '172', '140', '171', '139',
]
- _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13',
- '95', '94', '93', '92', '132', '151',
+ _available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
+ # AHLS
+ '96', '95', '94', '93', '92', '132', '151',
+ # 3D
'85', '102', '84', '101', '83', '100', '82',
# Dash video
'138', '248', '137', '247', '136', '246', '245',
@@ -172,11 +175,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Dash audio
'172', '141', '171', '140', '139',
]
+ _video_formats_map = {
+ 'flv': ['35', '34', '6', '5'],
+ '3gp': ['36', '17', '13'],
+ 'mp4': ['38', '37', '22', '18'],
+ 'webm': ['46', '45', '44', '43'],
+ }
_video_extensions = {
'13': '3gp',
- '17': 'mp4',
+ '17': '3gp',
'18': 'mp4',
'22': 'mp4',
+ '36': '3gp',
'37': 'mp4',
'38': 'mp4',
'43': 'webm',
@@ -193,7 +203,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'101': 'webm',
'102': 'webm',
- # videos that use m3u8
+ # AHLS
'92': 'mp4',
'93': 'mp4',
'94': 'mp4',
@@ -234,6 +244,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'22': '720x1280',
'34': '360x640',
'35': '480x854',
+ '36': '240x320',
'37': '1080x1920',
'38': '3072x4096',
'43': '360x640',
@@ -597,13 +608,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
else:
# Specific formats. We pick the first in a slash-delimeted sequence.
- # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
+ # Format can be specified as itag or 'mp4' or 'flv' etc. We pick the highest quality
+ # available in the specified format. For example,
+ # if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
+ # if '1/mp4/3/4' is requested and '1' and '5' (is a mp4) are available, we pick '1'.
+ # if '1/mp4/3/4' is requested and '4' and '5' (is a mp4) are available, we pick '5'.
req_formats = req_format.split('/')
video_url_list = None
for rf in req_formats:
if rf in url_map:
video_url_list = [(rf, url_map[rf])]
break
+ if rf in self._video_formats_map:
+ for srf in self._video_formats_map[rf]:
+ if srf in url_map:
+ video_url_list = [(srf, url_map[srf])]
+ break
+ else:
+ continue
+ break
if video_url_list is None:
raise ExtractorError(u'requested format not available')
return video_url_list
From 691008087b902fa731a8f4f840c1821c93505840 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sat, 31 Aug 2013 15:05:59 +0200
Subject: [PATCH 014/215] Add an automatic page generator for the supported
sites (related #156)
They are listed in the "supportedsites.html" page.
---
devscripts/gh-pages/update-sites.py | 33 +++++++++++++++++++++++++++++
devscripts/release.sh | 1 +
2 files changed, 34 insertions(+)
create mode 100755 devscripts/gh-pages/update-sites.py
diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py
new file mode 100755
index 000000000..fa4bb2beb
--- /dev/null
+++ b/devscripts/gh-pages/update-sites.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+import textwrap
+
+# We must be able to import youtube_dl
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+
+import youtube_dl
+
+def main():
+ with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
+ template = tmplf.read()
+
+ ie_htmls = []
+ for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME):
+ ie_html = '{}'.format(ie.IE_NAME)
+ try:
+ ie_html += ': {}'.format(ie.IE_DESC)
+ except AttributeError:
+ pass
+ if ie.working() == False:
+ ie_html += ' (Currently broken)'
+ ie_htmls.append('{}'.format(ie_html))
+
+ template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
+
+ with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
+ sitesf.write(template)
+
+if __name__ == '__main__':
+ main()
diff --git a/devscripts/release.sh b/devscripts/release.sh
index 24c9ad8d8..62c68a6cf 100755
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@@ -85,6 +85,7 @@ ROOT=$(pwd)
"$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
"$ROOT/devscripts/gh-pages/generate-download.py"
"$ROOT/devscripts/gh-pages/update-copyright.py"
+ "$ROOT/devscripts/gh-pages/update-sites.py"
git add *.html *.html.in update
git commit -m "release $version"
git show HEAD
From 6c758d79de48956b90d9e78aec695ee0b10b00d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sat, 31 Aug 2013 22:35:39 +0200
Subject: [PATCH 015/215] [metacafe] Add more cases for detecting the uploader
detection (reported in #1343)
---
youtube_dl/extractor/metacafe.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py
index e38dc98b4..e537648ff 100644
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dl/extractor/metacafe.py
@@ -122,7 +122,7 @@ class MetacafeIE(InfoExtractor):
video_title = self._html_search_regex(r'(?im)(.*) - Video', webpage, u'title')
description = self._og_search_description(webpage)
video_uploader = self._html_search_regex(
- r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("channel","([^"]+)"\);',
+ r'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);',
webpage, u'uploader nickname', fatal=False)
return {
From 8e4e89f1c236e1bec38c5363c1c341930056211e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Mon, 2 Sep 2013 11:54:09 +0200
Subject: [PATCH 016/215] Add an extractor for VeeHD (closes #1359)
---
youtube_dl/extractor/__init__.py | 1 +
youtube_dl/extractor/veehd.py | 56 ++++++++++++++++++++++++++++++++
2 files changed, 57 insertions(+)
create mode 100644 youtube_dl/extractor/veehd.py
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 90f1a4418..9f56e427c 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -89,6 +89,7 @@ from .tutv import TutvIE
from .unistra import UnistraIE
from .ustream import UstreamIE
from .vbox7 import Vbox7IE
+from .veehd import VeeHDIE
from .veoh import VeohIE
from .vevo import VevoIE
from .videofyme import VideofyMeIE
diff --git a/youtube_dl/extractor/veehd.py b/youtube_dl/extractor/veehd.py
new file mode 100644
index 000000000..3a99a29c6
--- /dev/null
+++ b/youtube_dl/extractor/veehd.py
@@ -0,0 +1,56 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urlparse,
+ get_element_by_id,
+ clean_html,
+)
+
+class VeeHDIE(InfoExtractor):
+ _VALID_URL = r'https?://veehd.com/video/(?P\d+)'
+
+ _TEST = {
+ u'url': u'http://veehd.com/video/4686958',
+ u'file': u'4686958.mp4',
+ u'info_dict': {
+ u'title': u'Time Lapse View from Space ( ISS)',
+ u'uploader_id': u'spotted',
+ u'description': u'md5:f0094c4cf3a72e22bc4e4239ef767ad7',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, video_id)
+ player_path = self._search_regex(r'\$\("#playeriframe"\).attr\({src : "(.+?)"',
+ webpage, u'player path')
+ player_url = compat_urlparse.urljoin(url, player_path)
+ player_page = self._download_webpage(player_url, video_id,
+ u'Downloading player page')
+ config_json = self._search_regex(r'value=\'config=({.+?})\'',
+ player_page, u'config json')
+ config = json.loads(config_json)
+
+ video_url = compat_urlparse.unquote(config['clip']['url'])
+ title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
+ uploader_id = self._html_search_regex(r'(.+?)',
+ webpage, u'uploader')
+ thumbnail = self._search_regex(r'
(.*?)
Date: Tue, 3 Sep 2013 01:51:17 +0200
Subject: [PATCH 017/215] Extractor for defense.gouv.fr
---
youtube_dl/extractor/__init__.py | 1 +
youtube_dl/extractor/defense.py | 37 ++++++++++++++++++++++++++++++++
2 files changed, 38 insertions(+)
create mode 100644 youtube_dl/extractor/defense.py
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 9f56e427c..a96b62d37 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -21,6 +21,7 @@ from .dailymotion import DailymotionIE, DailymotionPlaylistIE
from .depositfiles import DepositFilesIE
from .dotsub import DotsubIE
from .dreisat import DreiSatIE
+from .defense import DefenseGouvFrIE
from .ehow import EHowIE
from .eighttracks import EightTracksIE
from .escapist import EscapistIE
diff --git a/youtube_dl/extractor/defense.py b/youtube_dl/extractor/defense.py
new file mode 100644
index 000000000..963fb897f
--- /dev/null
+++ b/youtube_dl/extractor/defense.py
@@ -0,0 +1,37 @@
+# coding: utf-8
+'''Extractor for defense.gouv.fr'''
+import re
+import json
+
+from .common import InfoExtractor
+
+
+class DefenseGouvFrIE(InfoExtractor):
+ '''Extractor for defense.gouv.fr'''
+ _IE_NAME = 'defense.gouv.fr'
+ _VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
+ 'ligthboxvideo/base-de-medias/webtv/(.*)')
+
+ _TEST = {
+ u'url': (u'http://www.defense.gouv.fr/layout/set/ligthboxvideo/',
+ 'base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1')
+ }
+
+ def _real_extract(self, url):
+ title = re.match(self._VALID_URL, url).group(1)
+ webpage = self._download_webpage(url, title)
+ video_id = self._search_regex(
+ r"flashvars.pvg_id=\"(\d+)\";",
+ webpage, 'ID')
+
+ json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
+ + video_id)
+ info = self._download_webpage(json_url, title,
+ 'Downloading JSON config')
+ video_url = json.loads(info)['renditions'][0]['url']
+
+ return {'id': video_id,
+ 'ext': 'mp4',
+ 'url': video_url,
+ 'title': title,
+ }
From aa32314d09cf0ab3fad1efc2c5657e6704a7e47b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Tue, 3 Sep 2013 10:48:56 +0200
Subject: [PATCH 018/215] [vimeo] add support for videos that embed the
download url in the player page (fixes #1364)
---
youtube_dl/extractor/vimeo.py | 34 ++++++++++++++++++++++++++--------
1 file changed, 26 insertions(+), 8 deletions(-)
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 512e06e2a..dee4175ef 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -44,6 +44,16 @@ class VimeoIE(InfoExtractor):
u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
},
},
+ {
+ u'url': u'http://player.vimeo.com/video/54469442',
+ u'file': u'54469442.mp4',
+ u'md5': u'619b811a4417aa4abe78dc653becf511',
+ u'note': u'Videos that embed the url in the player page',
+ u'info_dict': {
+ u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software',
+ u'uploader': u'The BLN & Business of Software',
+ },
+ },
]
def _login(self):
@@ -112,7 +122,8 @@ class VimeoIE(InfoExtractor):
# Extract the config JSON
try:
- config = webpage.split(' = {config:')[1].split(',assets:')[0]
+ config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'],
+ webpage, u'info section', flags=re.DOTALL)
config = json.loads(config)
except:
if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
@@ -132,7 +143,9 @@ class VimeoIE(InfoExtractor):
video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None
# Extract video thumbnail
- video_thumbnail = config["video"]["thumbnail"]
+ video_thumbnail = config["video"].get("thumbnail")
+ if video_thumbnail is None:
+ _, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1]
# Extract video description
video_description = get_element_by_attribute("itemprop", "description", webpage)
@@ -154,14 +167,15 @@ class VimeoIE(InfoExtractor):
# TODO bind to format param
codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
files = { 'hd': [], 'sd': [], 'other': []}
+ config_files = config["video"].get("files") or config["request"].get("files")
for codec_name, codec_extension in codecs:
- if codec_name in config["video"]["files"]:
- if 'hd' in config["video"]["files"][codec_name]:
+ if codec_name in config_files:
+ if 'hd' in config_files[codec_name]:
files['hd'].append((codec_name, codec_extension, 'hd'))
- elif 'sd' in config["video"]["files"][codec_name]:
+ elif 'sd' in config_files[codec_name]:
files['sd'].append((codec_name, codec_extension, 'sd'))
else:
- files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0]))
+ files['other'].append((codec_name, codec_extension, config_files[codec_name][0]))
for quality in ('hd', 'sd', 'other'):
if len(files[quality]) > 0:
@@ -173,8 +187,12 @@ class VimeoIE(InfoExtractor):
else:
raise ExtractorError(u'No known codec found')
- video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
- %(video_id, sig, timestamp, video_quality, video_codec.upper())
+ video_url = None
+ if isinstance(config_files[video_codec], dict):
+ video_url = config_files[video_codec][video_quality].get("url")
+ if video_url is None:
+ video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
+ %(video_id, sig, timestamp, video_quality, video_codec.upper())
return [{
'id': video_id,
From 9c2ade40de53bae865c5267642651c81d16e48a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Tue, 3 Sep 2013 11:11:36 +0200
Subject: [PATCH 019/215] [vimeo] Handle Assertions Error when trying to get
the description
In some pages the html tags are not closed, python 2.6 cannot handle it.
---
youtube_dl/extractor/vimeo.py | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index dee4175ef..4a7d82b7a 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -148,9 +148,17 @@ class VimeoIE(InfoExtractor):
_, video_thumbnail = sorted((int(width), t_url) for (width, t_url) in config["video"]["thumbs"].items())[-1]
# Extract video description
- video_description = get_element_by_attribute("itemprop", "description", webpage)
- if video_description: video_description = clean_html(video_description)
- else: video_description = u''
+ video_description = None
+ try:
+ video_description = get_element_by_attribute("itemprop", "description", webpage)
+ if video_description: video_description = clean_html(video_description)
+ except AssertionError as err:
+ # On some pages like (http://player.vimeo.com/video/54469442) the
+ # html tags are not closed, python 2.6 cannot handle it
+ if err.args[0] == 'we should not get here!':
+ pass
+ else:
+ raise
# Extract upload date
video_upload_date = None
From 4ff7a0f1f6e6b1ad1743330d318dfe85806923b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Tue, 3 Sep 2013 11:33:59 +0200
Subject: [PATCH 020/215] [dailymotion] improve the regex for extracting the
video info
---
youtube_dl/extractor/dailymotion.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 1ea449ca8..439033d23 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -55,7 +55,8 @@ class DailymotionIE(InfoExtractor):
embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
embed_page = self._download_webpage(embed_url, video_id,
u'Downloading embed page')
- info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info')
+ info = self._search_regex(r'var info = ({.*?}),$', embed_page,
+ 'video info', flags=re.MULTILINE)
info = json.loads(info)
# TODO: support choosing qualities
From c8dbccde30d9ca06d4c9305329a9aacd10420276 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Tue, 3 Sep 2013 11:51:01 +0200
Subject: [PATCH 021/215] [orf] Remove the test video, they seem to expire in
one week
---
youtube_dl/extractor/orf.py | 13 -------------
1 file changed, 13 deletions(-)
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py
index 41ef8e992..cfca2a063 100644
--- a/youtube_dl/extractor/orf.py
+++ b/youtube_dl/extractor/orf.py
@@ -14,19 +14,6 @@ from ..utils import (
class ORFIE(InfoExtractor):
_VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P\d+)'
- _TEST = {
- u'url': u'http://tvthek.orf.at/programs/1171769-Wetter-ZIB/episodes/6557323-Wetter',
- u'file': u'6566957.flv',
- u'info_dict': {
- u'title': u'Wetter',
- u'description': u'Christa Kummer, Marcus Wadsak und Kollegen präsentieren abwechselnd ihre täglichen Wetterprognosen für Österreich.\r \r Mehr Wetter unter wetter.ORF.at',
- },
- u'params': {
- # It uses rtmp
- u'skip_download': True,
- }
- }
-
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
playlist_id = mobj.group('id')
From 025171c47641a47cc2a4e4ed52c7a04b465e0e5d Mon Sep 17 00:00:00 2001
From: Pierre Rudloff
Date: Tue, 3 Sep 2013 12:03:19 +0200
Subject: [PATCH 022/215] Suggested by @phihag
---
youtube_dl/extractor/defense.py | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/youtube_dl/extractor/defense.py b/youtube_dl/extractor/defense.py
index 963fb897f..424d960da 100644
--- a/youtube_dl/extractor/defense.py
+++ b/youtube_dl/extractor/defense.py
@@ -1,5 +1,3 @@
-# coding: utf-8
-'''Extractor for defense.gouv.fr'''
import re
import json
@@ -7,14 +5,18 @@ from .common import InfoExtractor
class DefenseGouvFrIE(InfoExtractor):
- '''Extractor for defense.gouv.fr'''
_IE_NAME = 'defense.gouv.fr'
_VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
- 'ligthboxvideo/base-de-medias/webtv/(.*)')
+ r'ligthboxvideo/base-de-medias/webtv/(.*)')
_TEST = {
- u'url': (u'http://www.defense.gouv.fr/layout/set/ligthboxvideo/',
- 'base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1')
+ u'url': (u'http://www.defense.gouv.fr/layout/set/ligthboxvideo/'
+ u'base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1'),
+ u'file': u'11213.mp4',
+ u'md5': u'75bba6124da7e63d2d60b5244ec9430c',
+ "info_dict": {
+ "title": "attaque-chimique-syrienne-du-21-aout-2013-1"
+ }
}
def _real_extract(self, url):
From 96fb5605b29c5029ab2894b5722c0937e320a3c0 Mon Sep 17 00:00:00 2001
From: Allan Zhou
Date: Tue, 3 Sep 2013 18:49:35 -0700
Subject: [PATCH 023/215] AHLS -> Apple HTTP Live Streaming
---
youtube_dl/extractor/youtube.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index d331aa01b..01265ca28 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -154,7 +154,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
# Listed in order of quality
_available_formats = ['38', '37', '46', '22', '45', '35', '44', '34', '18', '43', '6', '5', '36', '17', '13',
- # AHLS
+ # Apple HTTP Live Streaming
'96', '95', '94', '93', '92', '132', '151',
# 3D
'85', '84', '102', '83', '101', '82', '100',
@@ -165,7 +165,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'141', '172', '140', '171', '139',
]
_available_formats_prefer_free = ['38', '46', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '36', '17', '13',
- # AHLS
+ # Apple HTTP Live Streaming
'96', '95', '94', '93', '92', '132', '151',
# 3D
'85', '102', '84', '101', '83', '100', '82',
@@ -203,7 +203,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'101': 'webm',
'102': 'webm',
- # AHLS
+ # Apple HTTP Live Streaming
'92': 'mp4',
'93': 'mp4',
'94': 'mp4',
From 08523ee20a57e7ac28d895165f3b759b311e8495 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Wed, 4 Sep 2013 14:33:32 +0200
Subject: [PATCH 024/215] release 2013.09.04
---
README.md | 3 ++-
youtube_dl/version.py | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 75068fe56..2776cb3eb 100644
--- a/README.md
+++ b/README.md
@@ -113,7 +113,8 @@ which means you can modify it, redistribute it or use it however you like.
## Video Format Options:
-f, --format FORMAT video format code, specifiy the order of
- preference using slashes: "-f 22/17/18"
+ preference using slashes: "-f 22/17/18". "-f mp4"
+ and "-f flv" are also supported
--all-formats download all available video formats
--prefer-free-formats prefer free video formats unless a specific one
is requested
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index b6284c6d6..5d7467699 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.08.30'
+__version__ = '2013.09.04'
From 150f20828be552763dddce1c45b9a4e642cff599 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Wed, 4 Sep 2013 22:06:50 +0200
Subject: [PATCH 025/215] Add extractor for daum.net (closes #1330)
---
youtube_dl/extractor/__init__.py | 1 +
youtube_dl/extractor/daum.py | 71 ++++++++++++++++++++++++++++++++
2 files changed, 72 insertions(+)
create mode 100644 youtube_dl/extractor/daum.py
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index a96b62d37..caef53b73 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -18,6 +18,7 @@ from .condenast import CondeNastIE
from .criterion import CriterionIE
from .cspan import CSpanIE
from .dailymotion import DailymotionIE, DailymotionPlaylistIE
+from .daum import DaumIE
from .depositfiles import DepositFilesIE
from .dotsub import DotsubIE
from .dreisat import DreiSatIE
diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py
new file mode 100644
index 000000000..9b4566999
--- /dev/null
+++ b/youtube_dl/extractor/daum.py
@@ -0,0 +1,71 @@
+# encoding: utf-8
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urllib_parse,
+ determine_ext,
+)
+
+
+class DaumIE(InfoExtractor):
+ _VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P\d+)'
+ IE_NAME = u'daum.net'
+
+ _TEST = {
+ u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
+ u'file': u'52554690.mp4',
+ u'info_dict': {
+ u'title': u'DOTA 2GETHER 시즌2 6회 - 2부',
+ u'upload_date': u'20130831',
+ u'duration': 3868,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group(1)
+ webpage = self._download_webpage(url, video_id)
+ full_id = self._search_regex(r'
Date: Wed, 4 Sep 2013 22:09:22 +0200
Subject: [PATCH 026/215] Credit @Huarong for tv.sohu.com
---
youtube_dl/__init__.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index b6b12683f..4213ec1d5 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -28,6 +28,7 @@ __authors__ = (
'Axel Noack',
'Albert Kim',
'Pierre Rudloff',
+ 'Huarong Huo',
)
__license__ = 'Public Domain'
From 9363169b67a7837bdd157939a896bd38b350f634 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Thu, 5 Sep 2013 10:08:17 +0200
Subject: [PATCH 027/215] [daum] Get the video page from a canonical url to
extract the full id (fixes #1373) and extract description.
---
youtube_dl/extractor/daum.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py
index 9b4566999..a804e83bd 100644
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -18,6 +18,7 @@ class DaumIE(InfoExtractor):
u'file': u'52554690.mp4',
u'info_dict': {
u'title': u'DOTA 2GETHER 시즌2 6회 - 2부',
+ u'description': u'DOTA 2GETHER 시즌2 6회 - 2부',
u'upload_date': u'20130831',
u'duration': 3868,
},
@@ -26,7 +27,8 @@ class DaumIE(InfoExtractor):
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group(1)
- webpage = self._download_webpage(url, video_id)
+ canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
+ webpage = self._download_webpage(canonical_url, video_id)
full_id = self._search_regex(r'
Date: Thu, 5 Sep 2013 10:53:40 +0200
Subject: [PATCH 028/215] Add extractor for tvcast.naver.com (closes #1331)
---
youtube_dl/extractor/__init__.py | 1 +
youtube_dl/extractor/naver.py | 73 ++++++++++++++++++++++++++++++++
2 files changed, 74 insertions(+)
create mode 100644 youtube_dl/extractor/naver.py
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index caef53b73..70ebd29e2 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -58,6 +58,7 @@ from .mtv import MTVIE
from .muzu import MuzuTVIE
from .myspass import MySpassIE
from .myvideo import MyVideoIE
+from .naver import NaverIE
from .nba import NBAIE
from .nbc import NBCNewsIE
from .ooyala import OoyalaIE
diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py
new file mode 100644
index 000000000..9df236d69
--- /dev/null
+++ b/youtube_dl/extractor/naver.py
@@ -0,0 +1,73 @@
+# encoding: utf-8
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urllib_parse,
+ ExtractorError,
+)
+
+
+class NaverIE(InfoExtractor):
+ _VALID_URL = r'https?://tvcast\.naver\.com/v/(?P\d+)'
+
+ _TEST = {
+ u'url': u'http://tvcast.naver.com/v/81652',
+ u'file': u'81652.mp4',
+ u'info_dict': {
+ u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
+ u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
+ u'upload_date': u'20130903',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group(1)
+ webpage = self._download_webpage(url, video_id)
+ m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
+ webpage)
+ if m_id is None:
+ raise ExtractorError(u'couldn\'t extract vid and key')
+ vid = m_id.group(1)
+ key = m_id.group(2)
+ query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
+ query_urls = compat_urllib_parse.urlencode({
+ 'masterVid': vid,
+ 'protocol': 'p2p',
+ 'inKey': key,
+ })
+ info_xml = self._download_webpage(
+ 'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
+ video_id, u'Downloading video info')
+ urls_xml = self._download_webpage(
+ 'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
+ video_id, u'Downloading video formats info')
+ info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
+ urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
+
+ formats = []
+ for format_el in urls.findall('EncodingOptions/EncodingOption'):
+ domain = format_el.find('Domain').text
+ if domain.startswith('rtmp'):
+ continue
+ formats.append({
+ 'url': domain + format_el.find('uri').text,
+ 'ext': 'mp4',
+ 'width': int(format_el.find('width').text),
+ 'height': int(format_el.find('height').text),
+ })
+
+ info = {
+ 'id': video_id,
+ 'title': info.find('Subject').text,
+ 'formats': formats,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'upload_date': info.find('WriteDate').text.replace('.', ''),
+ 'view_count': int(info.find('PlayCount').text),
+ }
+ # TODO: Remove when #980 has been merged
+ info.update(formats[-1])
+ return info
From 08e291b54d8aaa34300c02e70ff86aaa36820a62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Thu, 5 Sep 2013 18:02:17 +0200
Subject: [PATCH 029/215] [generic] Recognize html5 video in the format '
',
+ webpage, u'description', flags=re.DOTALL)
+
+ info = {
+ 'id': video_id,
+ 'title': clip.find('title').text,
+ 'formats': formats,
+ 'description': description,
+ 'duration': int(clip.find('duration').text),
+ }
+ # TODO: Remove when #980 has been merged
+ info.update(formats[-1])
+ return info
From a490fda7464a3cb9d7b5938305241740bae69efb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Fri, 6 Sep 2013 18:36:07 +0200
Subject: [PATCH 044/215] [daylimotion] accept embed urls (fixes #1386)
---
youtube_dl/extractor/dailymotion.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 439033d23..3c616e089 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -14,7 +14,7 @@ from ..utils import (
class DailymotionIE(InfoExtractor):
"""Information Extractor for Dailymotion"""
- _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)'
+ _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
IE_NAME = u'dailymotion'
_TEST = {
u'url': u'http://www.dailymotion.com/video/x33vw9_tutoriel-de-youtubeur-dl-des-video_tech',
@@ -33,6 +33,7 @@ class DailymotionIE(InfoExtractor):
video_id = mobj.group(1).split('_')[0].split('?')[0]
video_extension = 'mp4'
+ url = 'http://www.dailymotion.com/video/%s' % video_id
# Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url)
From a7130543fa0368175740f5fa173ef920671db866 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Fri, 6 Sep 2013 18:39:35 +0200
Subject: [PATCH 045/215] [generic] If the url doesn't specify the protocol,
then try to extract prepending 'http://'
---
youtube_dl/extractor/generic.py | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index de7379a92..f92e61fea 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -109,6 +109,11 @@ class GenericIE(InfoExtractor):
return new_url
def _real_extract(self, url):
+ parsed_url = compat_urlparse.urlparse(url)
+ if not parsed_url.scheme:
+ self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
+ return self.url_result('http://' + url)
+
try:
new_url = self._test_redirect(url)
if new_url:
From 8f362589a55f5e9fe0bf119179b37b365f6d9459 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sat, 7 Sep 2013 22:29:15 +0200
Subject: [PATCH 046/215] release 2013.09.07
---
youtube_dl/version.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 2f31cdd87..1bd053ab4 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.09.06.1'
+__version__ = '2013.09.07'
From 890f62e86805c9eed8444450a209f894bbd74e7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sun, 8 Sep 2013 18:49:10 +0200
Subject: [PATCH 047/215] Revert "[youtube] Fix detection of tags from HLS
videos."
They have undo the change
This reverts commit 0638ad9999e7c374b253d0e13f4e3a20ef0b1171.
---
youtube_dl/extractor/youtube.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 423a5e973..bad15cb44 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -643,7 +643,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
manifest = self._download_webpage(manifest_url, video_id, u'Downloading formats manifest')
formats_urls = _get_urls(manifest)
for format_url in formats_urls:
- itag = self._search_regex(r'itag%3D(\d+?)/', format_url, 'itag')
+ itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
url_map[itag] = format_url
return url_map
From 8963d9c2661b7de8832b7afcf1cdbc197275d8e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Mon, 9 Sep 2013 10:33:12 +0200
Subject: [PATCH 048/215] [youtube] Modify the regex to match ids of length 11
(fixes #1396)
In urls like http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930 you can't split the query string and ids always have that length.
---
test/test_all_urls.py | 11 +++++++----
youtube_dl/extractor/youtube.py | 2 +-
2 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index 5d8d93e0e..99fc7bd28 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -72,10 +72,13 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertTrue(JustinTVIE.suitable(u"http://www.twitch.tv/tsm_theoddone/c/2349361"))
def test_youtube_extract(self):
- self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
- self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc')
- self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc')
- self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc')
+ assertExtractId = lambda url, id: self.assertEqual(YoutubeIE()._extract_id(url), id)
+ assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
+ assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc')
+ assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc')
+ assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc')
+ assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc')
+ assertExtractId('BaW_jenozKc', 'BaW_jenozKc')
def test_no_duplicates(self):
ies = gen_extractors()
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index bad15cb44..6a8351293 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -150,7 +150,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|youtu\.be/ # just youtu.be/xxxx
)
)? # all until now is optional -> you can pass the naked ID
- ([0-9A-Za-z_-]+) # here is it! the YouTube video ID
+ ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
(?(1).+)? # if we found the ID, everything can follow
$"""
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
From 061b2889a9a5a13c6c180932ea742975cdb02948 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Mon, 9 Sep 2013 10:38:54 +0200
Subject: [PATCH 049/215] Fix the minutes part in FileDownloader.format_seconds
(fixed #1397)
It printed for the minutes the result of (seconds // 60)
---
youtube_dl/FileDownloader.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index 7c5ac4bc2..0b5a5d77d 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -66,7 +66,7 @@ class FileDownloader(object):
@staticmethod
def format_seconds(seconds):
(mins, secs) = divmod(seconds, 60)
- (hours, eta_mins) = divmod(mins, 60)
+ (hours, mins) = divmod(mins, 60)
if hours > 99:
return '--:--:--'
if hours == 0:
From 6d2d21f713614141cba09310cb60d2edd76c79ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Mon, 9 Sep 2013 19:56:16 +0200
Subject: [PATCH 050/215] [sohu] add support for my.tv.sohu.com urls (fixes
#1398)
---
youtube_dl/extractor/sohu.py | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
index 77bb0a8dc..2b9bf0cb7 100644
--- a/youtube_dl/extractor/sohu.py
+++ b/youtube_dl/extractor/sohu.py
@@ -8,7 +8,7 @@ from ..utils import ExtractorError
class SohuIE(InfoExtractor):
- _VALID_URL = r'https?://tv\.sohu\.com/\d+?/n(?P\d+)\.shtml.*?'
+ _VALID_URL = r'https?://(?Pmy\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P\d+)\.shtml.*?'
_TEST = {
u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super',
@@ -21,8 +21,11 @@ class SohuIE(InfoExtractor):
def _real_extract(self, url):
- def _fetch_data(vid_id):
- base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
+ def _fetch_data(vid_id, mytv=False):
+ if mytv:
+ base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
+ else:
+ base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid='
data_url = base_data_url + str(vid_id)
data_json = self._download_webpage(
data_url, video_id,
@@ -31,15 +34,16 @@ class SohuIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
+ mytv = mobj.group('mytv') is not None
webpage = self._download_webpage(url, video_id)
raw_title = self._html_search_regex(r'(?s)(.+?)',
webpage, u'video title')
title = raw_title.partition('-')[0].strip()
- vid = self._html_search_regex(r'var vid="(\d+)"', webpage,
+ vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage,
u'video path')
- data = _fetch_data(vid)
+ data = _fetch_data(vid, mytv)
QUALITIES = ('ori', 'super', 'high', 'nor')
vid_ids = [data['data'][q + 'Vid']
@@ -51,7 +55,7 @@ class SohuIE(InfoExtractor):
# For now, we just pick the highest available quality
vid_id = vid_ids[-1]
- format_data = data if vid == vid_id else _fetch_data(vid_id)
+ format_data = data if vid == vid_id else _fetch_data(vid_id, mytv)
part_count = format_data['data']['totalBlocks']
allot = format_data['allot']
prot = format_data['prot']
From 07463ea16249965fa4033e1383a8f51866ee851c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Tue, 10 Sep 2013 11:19:58 +0200
Subject: [PATCH 051/215] Add an extractor for Slideshare (closes #1400)
---
youtube_dl/extractor/__init__.py | 1 +
youtube_dl/extractor/slideshare.py | 47 ++++++++++++++++++++++++++++++
2 files changed, 48 insertions(+)
create mode 100644 youtube_dl/extractor/slideshare.py
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index fbe0b8cb7..bedb208fb 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -75,6 +75,7 @@ from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE
from .sina import SinaIE
from .slashdot import SlashdotIE
+from .slideshare import SlideshareIE
from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE
from .spiegel import SpiegelIE
diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py
new file mode 100644
index 000000000..afc3001b5
--- /dev/null
+++ b/youtube_dl/extractor/slideshare.py
@@ -0,0 +1,47 @@
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urlparse,
+ ExtractorError,
+)
+
+
+class SlideshareIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P.+?)($|\?)'
+
+ _TEST = {
+ u'url': u'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity',
+ u'file': u'25665706.mp4',
+ u'info_dict': {
+ u'title': u'Managing Scale and Complexity',
+ u'description': u'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ page_title = mobj.group('title')
+ webpage = self._download_webpage(url, page_title)
+ slideshare_obj = self._search_regex(
+ r'var slideshare_object = ({.*?}); var user_info =',
+ webpage, u'slideshare object')
+ info = json.loads(slideshare_obj)
+ if info['slideshow']['type'] != u'video':
+ raise ExtractorError(u'Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True)
+
+ doc = info['doc']
+ bucket = info['jsplayer']['video_bucket']
+ ext = info['jsplayer']['video_extension']
+ video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext)
+
+ return {
+ '_type': 'video',
+ 'id': info['slideshow']['id'],
+ 'title': info['slideshow']['title'],
+ 'ext': ext,
+ 'url': video_url,
+ 'thumbnail': info['slideshow']['pin_image_url'],
+ 'description': self._og_search_description(webpage),
+ }
From a1ab5538587c92b9f6d8299aea52ecf694553123 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Tue, 10 Sep 2013 11:25:11 +0200
Subject: [PATCH 052/215] release 2013.09.10
---
youtube_dl/version.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 1bd053ab4..aba9520c5 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.09.07'
+__version__ = '2013.09.10'
From 5d8afe69f7be840c6494333e84c46f0a1ca93147 Mon Sep 17 00:00:00 2001
From: Pierre Rudloff
Date: Sun, 8 Sep 2013 21:55:11 +0200
Subject: [PATCH 053/215] Add an extractor for pluzz.francetv.fr (closes PR
#1399)
---
youtube_dl/extractor/__init__.py | 1 +
youtube_dl/extractor/francetv.py | 50 ++++++++++++++++++++++++++++++++
2 files changed, 51 insertions(+)
create mode 100644 youtube_dl/extractor/francetv.py
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index bedb208fb..0d80acd22 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -29,6 +29,7 @@ from .escapist import EscapistIE
from .exfm import ExfmIE
from .facebook import FacebookIE
from .flickr import FlickrIE
+from .francetv import PluzzIE
from .freesound import FreesoundIE
from .funnyordie import FunnyOrDieIE
from .gamespot import GameSpotIE
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
new file mode 100644
index 000000000..6e1176470
--- /dev/null
+++ b/youtube_dl/extractor/francetv.py
@@ -0,0 +1,50 @@
+# encoding: utf-8
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_urlparse,
+)
+
+
+class PluzzIE(InfoExtractor):
+ IE_NAME = u'pluzz.francetv.fr'
+ _VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
+
+ _TEST = {
+ u'url': u'http://pluzz.francetv.fr/videos/allo_rufo_saison5_,88439064.html',
+ u'file': u'88439064.mp4',
+ u'info_dict': {
+ u'title': u'Allô Rufo',
+ u'description': u'md5:d909f1ebdf963814b65772aea250400e',
+ },
+ u'params': {
+ u'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ title = re.match(self._VALID_URL, url).group(1)
+ webpage = self._download_webpage(url, title)
+ video_id = self._search_regex(
+ r'data-diffusion="(\d+)"', webpage, 'ID')
+
+ xml_desc = self._download_webpage(
+ 'http://www.pluzz.fr/appftv/webservices/video/'
+ 'getInfosOeuvre.php?id-diffusion='
+ + video_id, title, 'Downloading XML config')
+ info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
+
+ manifest_url = info.find('videos/video/url').text
+ video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
+ video_url = video_url.replace('/z/', '/i/')
+ thumbnail_path = info.find('image').text
+
+ return {'id': video_id,
+ 'ext': 'mp4',
+ 'url': video_url,
+ 'title': info.find('titre').text,
+ 'thumbnail': compat_urlparse.urljoin(url, thumbnail_path),
+ 'description': info.find('synopsis').text,
+ }
From 6b361ad5eed2c70a36ab5e6a013be9496701d62a Mon Sep 17 00:00:00 2001
From: Pierre Rudloff
Date: Tue, 10 Sep 2013 12:13:22 +0200
Subject: [PATCH 054/215] Wrong property name
---
youtube_dl/extractor/canalc2.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py
index 50832217a..e7f4fa9fd 100644
--- a/youtube_dl/extractor/canalc2.py
+++ b/youtube_dl/extractor/canalc2.py
@@ -5,7 +5,7 @@ from .common import InfoExtractor
class Canalc2IE(InfoExtractor):
- _IE_NAME = 'canalc2.tv'
+ IE_NAME = 'canalc2.tv'
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
_TEST = {
From 648d25d43d86afe853f10dff25029626c7863502 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Tue, 10 Sep 2013 15:50:34 +0200
Subject: [PATCH 055/215] [francetv] Add an extractor for francetvinfo.fr
(closes #1317)
It uses the same system as Pluzz, create a base class for both extractors.
---
youtube_dl/extractor/__init__.py | 5 ++-
youtube_dl/extractor/francetv.py | 61 +++++++++++++++++++++++---------
2 files changed, 48 insertions(+), 18 deletions(-)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 0d80acd22..26cf24935 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -29,7 +29,10 @@ from .escapist import EscapistIE
from .exfm import ExfmIE
from .facebook import FacebookIE
from .flickr import FlickrIE
-from .francetv import PluzzIE
+from .francetv import (
+ PluzzIE,
+ FranceTvInfoIE,
+)
from .freesound import FreesoundIE
from .funnyordie import FunnyOrDieIE
from .gamespot import GameSpotIE
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index 6e1176470..f2b12c884 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -8,7 +8,29 @@ from ..utils import (
)
-class PluzzIE(InfoExtractor):
+class FranceTVBaseInfoExtractor(InfoExtractor):
+ def _extract_video(self, video_id):
+ xml_desc = self._download_webpage(
+ 'http://www.francetvinfo.fr/appftv/webservices/video/'
+ 'getInfosOeuvre.php?id-diffusion='
+ + video_id, video_id, 'Downloading XML config')
+ info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
+
+ manifest_url = info.find('videos/video/url').text
+ video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
+ video_url = video_url.replace('/z/', '/i/')
+ thumbnail_path = info.find('image').text
+
+ return {'id': video_id,
+ 'ext': 'mp4',
+ 'url': video_url,
+ 'title': info.find('titre').text,
+ 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
+ 'description': info.find('synopsis').text,
+ }
+
+
+class PluzzIE(FranceTVBaseInfoExtractor):
IE_NAME = u'pluzz.francetv.fr'
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
@@ -29,22 +51,27 @@ class PluzzIE(InfoExtractor):
webpage = self._download_webpage(url, title)
video_id = self._search_regex(
r'data-diffusion="(\d+)"', webpage, 'ID')
+ return self._extract_video(video_id)
- xml_desc = self._download_webpage(
- 'http://www.pluzz.fr/appftv/webservices/video/'
- 'getInfosOeuvre.php?id-diffusion='
- + video_id, title, 'Downloading XML config')
- info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
- manifest_url = info.find('videos/video/url').text
- video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
- video_url = video_url.replace('/z/', '/i/')
- thumbnail_path = info.find('image').text
+class FranceTvInfoIE(FranceTVBaseInfoExtractor):
+ IE_NAME = u'francetvinfo.fr'
+ _VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P.+).html'
- return {'id': video_id,
- 'ext': 'mp4',
- 'url': video_url,
- 'title': info.find('titre').text,
- 'thumbnail': compat_urlparse.urljoin(url, thumbnail_path),
- 'description': info.find('synopsis').text,
- }
+ _TEST = {
+ u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
+ u'file': u'84981923.mp4',
+ u'info_dict': {
+ u'title': u'Soir 3',
+ },
+ u'params': {
+ u'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ page_title = mobj.group('title')
+ webpage = self._download_webpage(url, page_title)
+ video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
+ return self._extract_video(video_id)
From aa8f2641daed692bfa996e9ab3235643b2fb7aab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Wed, 11 Sep 2013 11:24:58 +0200
Subject: [PATCH 056/215] [youtube] update algo for length 85 (fixes #1408 and
fixes #1406)
---
devscripts/youtube_genalgo.py | 4 ++--
youtube_dl/extractor/youtube.py | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py
index 3d1f83a93..6e3595366 100644
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -23,9 +23,9 @@ tests = [
# 86 - vfluy6kdb 2013/09/06
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
"yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),
- # 85
+ # 85 - vflkuzxcs 2013/09/11
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
- ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
+ "T>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOvUY.REWQ0987654321mnbqcxzasdfghjklpoiuytr"),
# 84 - vflg0g8PQ 2013/08/29 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 6a8351293..0e828263c 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -438,7 +438,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif len(s) == 86:
return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
elif len(s) == 85:
- return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
+ return s[40] + s[82:43:-1] + s[22] + s[42:40:-1] + s[83] + s[39:22:-1] + s[0] + s[21:2:-1]
elif len(s) == 84:
return s[81:36:-1] + s[0] + s[35:2:-1]
elif len(s) == 83:
From 1f7dc42cd0d99c1349205cbb89a2306b7aa057a2 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Wed, 11 Sep 2013 11:30:10 +0200
Subject: [PATCH 057/215] release 2013.11.09
---
youtube_dl/version.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index aba9520c5..795f4c2c7 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.09.10'
+__version__ = '2013.11.09'
From 22c8b525458c8203c0709a523c646b8d152f03b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Wed, 11 Sep 2013 12:04:27 +0200
Subject: [PATCH 058/215] In the supported sites page, sort the extractors in
case insensitive
---
devscripts/gh-pages/update-sites.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py
index fa4bb2beb..33f242480 100755
--- a/devscripts/gh-pages/update-sites.py
+++ b/devscripts/gh-pages/update-sites.py
@@ -14,7 +14,7 @@ def main():
template = tmplf.read()
ie_htmls = []
- for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME):
+ for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()):
ie_html = '{}'.format(ie.IE_NAME)
try:
ie_html += ': {}'.format(ie.IE_DESC)
From f8e52269c1a27c28aef606f010e2c64ff9a946d3 Mon Sep 17 00:00:00 2001
From: Ismael Mejia
Date: Wed, 11 Sep 2013 15:21:09 +0200
Subject: [PATCH 059/215] [subtitles] made inheritance hierarchy flat as
requested
---
youtube_dl/extractor/dailymotion.py | 32 ++++++++++++++---------------
youtube_dl/extractor/youtube.py | 6 ++----
2 files changed, 17 insertions(+), 21 deletions(-)
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index f7dffd4cc..c7bcf6e8e 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -18,23 +18,7 @@ from ..utils import (
)
-class DailyMotionSubtitlesIE(NoAutoSubtitlesIE):
-
- def _get_available_subtitles(self, video_id):
- request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id)
- try:
- sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
- return {}
- info = json.loads(sub_list)
- if (info['total'] > 0):
- sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
- return sub_lang_list
- self._downloader.report_warning(u'video doesn\'t have subtitles')
- return {}
-
-class DailymotionIE(DailyMotionSubtitlesIE, InfoExtractor):
+class DailymotionIE(NoAutoSubtitlesIE):
"""Information Extractor for Dailymotion"""
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
@@ -120,6 +104,20 @@ class DailymotionIE(DailyMotionSubtitlesIE, InfoExtractor):
'thumbnail': info['thumbnail_url']
}]
+ def _get_available_subtitles(self, video_id):
+ request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id)
+ try:
+ sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
+ return {}
+ info = json.loads(sub_list)
+ if (info['total'] > 0):
+ sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
+ return sub_lang_list
+ self._downloader.report_warning(u'video doesn\'t have subtitles')
+ return {}
+
class DailymotionPlaylistIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P.+?)/'
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index e71cd62ec..5945eab70 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -24,7 +24,7 @@ from ..utils import (
orderedSet,
)
-class YoutubeBaseInfoExtractor(InfoExtractor):
+class YoutubeBaseInfoExtractor(SubtitlesIE):
"""Provide base functions for Youtube extractors"""
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
@@ -131,8 +131,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return
self._confirm_age()
-class YoutubeSubtitlesIE(SubtitlesIE):
-
def _get_available_subtitles(self, video_id):
request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
try:
@@ -189,7 +187,7 @@ class YoutubeSubtitlesIE(SubtitlesIE):
self._downloader.report_warning(err_msg)
return {}
-class YoutubeIE(YoutubeSubtitlesIE, YoutubeBaseInfoExtractor):
+class YoutubeIE(YoutubeBaseInfoExtractor):
IE_DESC = u'YouTube.com'
_VALID_URL = r"""^
(
From de7f3446e0bf99a2fe7a93eb28175b16cb2cf6c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Wed, 11 Sep 2013 15:48:23 +0200
Subject: [PATCH 060/215] [youtube] move subtitles methods from the base
extractor to YoutubeIE
---
youtube_dl/extractor/youtube.py | 115 ++++++++++++++++----------------
1 file changed, 58 insertions(+), 57 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 5945eab70..8102f6d24 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -24,7 +24,7 @@ from ..utils import (
orderedSet,
)
-class YoutubeBaseInfoExtractor(SubtitlesIE):
+class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors"""
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
_LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
@@ -131,63 +131,8 @@ class YoutubeBaseInfoExtractor(SubtitlesIE):
return
self._confirm_age()
- def _get_available_subtitles(self, video_id):
- request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
- try:
- sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
- return {}
- lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
- sub_lang_list = {}
- for l in lang_list:
- lang = l[1]
- params = compat_urllib_parse.urlencode({
- 'lang': lang,
- 'v': video_id,
- 'fmt': self._downloader.params.get('subtitlesformat'),
- })
- url = u'http://www.youtube.com/api/timedtext?' + params
- sub_lang_list[lang] = url
- if not sub_lang_list:
- self._downloader.report_warning(u'video doesn\'t have subtitles')
- return {}
- return sub_lang_list
-
- def _request_automatic_caption(self, video_id, webpage):
- """We need the webpage for getting the captions url, pass it as an
- argument to speed up the process."""
- sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0]
- sub_format = self._downloader.params.get('subtitlesformat')
- self.to_screen(u'%s: Looking for automatic captions' % video_id)
- mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
- err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
- if mobj is None:
- self._downloader.report_warning(err_msg)
- return {}
- player_config = json.loads(mobj.group(1))
- try:
- args = player_config[u'args']
- caption_url = args[u'ttsurl']
- timestamp = args[u'timestamp']
- params = compat_urllib_parse.urlencode({
- 'lang': 'en',
- 'tlang': sub_lang,
- 'fmt': sub_format,
- 'ts': timestamp,
- 'kind': 'asr',
- })
- subtitles_url = caption_url + '&' + params
- sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
- return {sub_lang: sub}
- # An extractor error can be raise by the download process if there are
- # no automatic captions but there are subtitles
- except (KeyError, ExtractorError):
- self._downloader.report_warning(err_msg)
- return {}
-
-class YoutubeIE(YoutubeBaseInfoExtractor):
+class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
IE_DESC = u'YouTube.com'
_VALID_URL = r"""^
(
@@ -508,6 +453,62 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Fallback to the other algortihms
return self._decrypt_signature(s)
+ def _get_available_subtitles(self, video_id):
+ request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
+ try:
+ sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
+ return {}
+ lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
+
+ sub_lang_list = {}
+ for l in lang_list:
+ lang = l[1]
+ params = compat_urllib_parse.urlencode({
+ 'lang': lang,
+ 'v': video_id,
+ 'fmt': self._downloader.params.get('subtitlesformat'),
+ })
+ url = u'http://www.youtube.com/api/timedtext?' + params
+ sub_lang_list[lang] = url
+ if not sub_lang_list:
+ self._downloader.report_warning(u'video doesn\'t have subtitles')
+ return {}
+ return sub_lang_list
+
+ def _request_automatic_caption(self, video_id, webpage):
+ """We need the webpage for getting the captions url, pass it as an
+ argument to speed up the process."""
+ sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0]
+ sub_format = self._downloader.params.get('subtitlesformat')
+ self.to_screen(u'%s: Looking for automatic captions' % video_id)
+ mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
+ err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
+ if mobj is None:
+ self._downloader.report_warning(err_msg)
+ return {}
+ player_config = json.loads(mobj.group(1))
+ try:
+ args = player_config[u'args']
+ caption_url = args[u'ttsurl']
+ timestamp = args[u'timestamp']
+ params = compat_urllib_parse.urlencode({
+ 'lang': 'en',
+ 'tlang': sub_lang,
+ 'fmt': sub_format,
+ 'ts': timestamp,
+ 'kind': 'asr',
+ })
+ subtitles_url = caption_url + '&' + params
+ sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
+ return {sub_lang: sub}
+ # An extractor error can be raise by the download process if there are
+ # no automatic captions but there are subtitles
+ except (KeyError, ExtractorError):
+ self._downloader.report_warning(err_msg)
+ return {}
+
def _print_formats(self, formats):
print('Available formats:')
for x in formats:
From 54d39d8b2f7a9fe148a24dd2785108b7d3823d9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Wed, 11 Sep 2013 15:51:04 +0200
Subject: [PATCH 061/215] [subtitles] rename SubitlesIE to
SubtitlesInfoExtractor
Otherwise it can be automatically detected as a IE ready for use.
---
youtube_dl/extractor/dailymotion.py | 4 ++--
youtube_dl/extractor/subtitles.py | 4 ++--
youtube_dl/extractor/youtube.py | 2 +-
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index c7bcf6e8e..d73023b9e 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -4,7 +4,7 @@ import itertools
import socket
from .common import InfoExtractor
-from .subtitles import NoAutoSubtitlesIE
+from .subtitles import NoAutoSubtitlesInfoExtractor
from ..utils import (
compat_http_client,
@@ -18,7 +18,7 @@ from ..utils import (
)
-class DailymotionIE(NoAutoSubtitlesIE):
+class DailymotionIE(NoAutoSubtitlesInfoExtractor):
"""Information Extractor for Dailymotion"""
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py
index c10cdf266..8953d6789 100644
--- a/youtube_dl/extractor/subtitles.py
+++ b/youtube_dl/extractor/subtitles.py
@@ -10,7 +10,7 @@ from ..utils import (
)
-class SubtitlesIE(InfoExtractor):
+class SubtitlesInfoExtractor(InfoExtractor):
def _list_available_subtitles(self, video_id):
""" outputs the available subtitles for the video """
@@ -72,7 +72,7 @@ class SubtitlesIE(InfoExtractor):
pass
-class NoAutoSubtitlesIE(SubtitlesIE):
+class NoAutoSubtitlesInfoExtractor(SubtitlesInfoExtractor):
""" A subtitle class for the servers that don't support auto-captions"""
def _request_automatic_caption(self, video_id, webpage):
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 8102f6d24..0476f113e 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -7,7 +7,7 @@ import socket
import itertools
from .common import InfoExtractor, SearchInfoExtractor
-from .subtitles import SubtitlesIE
+from .subtitles import SubtitlesInfoExtractor
from ..utils import (
compat_http_client,
compat_parse_qs,
From d82134c3395c0912157c7ccae9f21d4b3375910b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Wed, 11 Sep 2013 16:05:49 +0200
Subject: [PATCH 062/215] [subtitles] Simplify the extraction of subtitles in
subclasses and remove NoAutoSubtitlesInfoExtractor
Subclasses just need to call the method extract_subtitles, which will call _extract_subtitles and _request_automatic_caption
Now the default implementation of _request_automatic_caption returns {}.
---
youtube_dl/extractor/dailymotion.py | 13 +++--------
youtube_dl/extractor/subtitles.py | 34 +++++++++++++++++++----------
youtube_dl/extractor/youtube.py | 7 +-----
3 files changed, 27 insertions(+), 27 deletions(-)
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index d73023b9e..abd6a36ee 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -4,7 +4,7 @@ import itertools
import socket
from .common import InfoExtractor
-from .subtitles import NoAutoSubtitlesInfoExtractor
+from .subtitles import SubtitlesInfoExtractor
from ..utils import (
compat_http_client,
@@ -18,7 +18,7 @@ from ..utils import (
)
-class DailymotionIE(NoAutoSubtitlesInfoExtractor):
+class DailymotionIE(SubtitlesInfoExtractor):
"""Information Extractor for Dailymotion"""
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
@@ -81,14 +81,7 @@ class DailymotionIE(NoAutoSubtitlesInfoExtractor):
video_url = info[max_quality]
# subtitles
- video_subtitles = None
- video_webpage = None
-
- if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
- video_subtitles = self._extract_subtitles(video_id)
- elif self._downloader.params.get('writeautomaticsub', False):
- video_subtitles = self._request_automatic_caption(video_id, video_webpage)
-
+ video_subtitles = self.extract_subtitles(video_id)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id)
return
diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py
index 8953d6789..5ae8b3b16 100644
--- a/youtube_dl/extractor/subtitles.py
+++ b/youtube_dl/extractor/subtitles.py
@@ -62,19 +62,31 @@ class SubtitlesInfoExtractor(InfoExtractor):
return sub
def _get_available_subtitles(self, video_id):
- """ returns {sub_lang: url} or {} if not available """
- """ Must be redefined by the subclasses """
+ """
+ returns {sub_lang: url} or {} if not available
+ Must be redefined by the subclasses
+ """
pass
def _request_automatic_caption(self, video_id, webpage):
- """ returns {sub_lang: sub} or {} if not available """
- """ Must be redefined by the subclasses """
- pass
-
-
-class NoAutoSubtitlesInfoExtractor(SubtitlesInfoExtractor):
- """ A subtitle class for the servers that don't support auto-captions"""
-
- def _request_automatic_caption(self, video_id, webpage):
+ """
+ returns {sub_lang: sub} or {} if not available
+ Must be redefined by the subclasses that support automatic captions,
+ otherwise it will return {}
+ """
self._downloader.report_warning(u'Automatic Captions not supported by this server')
return {}
+
+ def extract_subtitles(self, video_id, video_webpage=None):
+ """
+ Extract the subtitles and/or the automatic captions if requested.
+ Returns None or a dictionary in the format {sub_lang: sub}
+ """
+ video_subtitles = None
+ if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
+ video_subtitles = self._extract_subtitles(video_id)
+ elif self._downloader.params.get('writeautomaticsub', False):
+ video_subtitles = self._request_automatic_caption(video_id, video_webpage)
+ return video_subtitles
+
+
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 0476f113e..3bba45b79 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -707,12 +707,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_description = u''
# subtitles
- video_subtitles = None
-
- if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
- video_subtitles = self._extract_subtitles(video_id)
- elif self._downloader.params.get('writeautomaticsub', False):
- video_subtitles = self._request_automatic_caption(video_id, video_webpage)
+ video_subtitles = self.extract_subtitles(video_id, video_webpage)
if self._downloader.params.get('listsubtitles', False):
self._list_available_subtitles(video_id)
From 7fad1c6328b02ba9f23d37f374a05255abfe38a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Wed, 11 Sep 2013 16:24:47 +0200
Subject: [PATCH 063/215] [subtitles] Use self._download_webpage for extracting
the subtitles
It raises ExtractorError for the same exceptions we have to catch.
---
youtube_dl/extractor/dailymotion.py | 10 ++++------
youtube_dl/extractor/subtitles.py | 12 +++---------
youtube_dl/extractor/youtube.py | 7 ++++---
3 files changed, 11 insertions(+), 18 deletions(-)
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index abd6a36ee..360113f9c 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -1,14 +1,11 @@
import re
import json
import itertools
-import socket
from .common import InfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
- compat_http_client,
- compat_urllib_error,
compat_urllib_request,
compat_str,
get_element_by_attribute,
@@ -98,10 +95,11 @@ class DailymotionIE(SubtitlesInfoExtractor):
}]
def _get_available_subtitles(self, video_id):
- request = compat_urllib_request.Request('https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id)
try:
- sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ sub_list = self._download_webpage(
+ 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
+ video_id, note=False)
+ except ExtractorError as err:
self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
return {}
info = json.loads(sub_list)
diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py
index 5ae8b3b16..9a3c54b65 100644
--- a/youtube_dl/extractor/subtitles.py
+++ b/youtube_dl/extractor/subtitles.py
@@ -1,12 +1,8 @@
-import socket
-
from .common import InfoExtractor
from ..utils import (
- compat_http_client,
- compat_urllib_error,
- compat_urllib_request,
compat_str,
+ ExtractorError,
)
@@ -52,8 +48,8 @@ class SubtitlesInfoExtractor(InfoExtractor):
def _request_subtitle_url(self, sub_lang, url):
""" makes the http request for the subtitle """
try:
- sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ sub = self._download_webpage(url, None, note=False)
+ except ExtractorError as err:
self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
return
if not sub:
@@ -88,5 +84,3 @@ class SubtitlesInfoExtractor(InfoExtractor):
elif self._downloader.params.get('writeautomaticsub', False):
video_subtitles = self._request_automatic_caption(video_id, video_webpage)
return video_subtitles
-
-
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 3bba45b79..d06cc49c4 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -454,10 +454,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return self._decrypt_signature(s)
def _get_available_subtitles(self, video_id):
- request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
try:
- sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ sub_list = self._download_webpage(
+ 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
+ video_id, note=False)
+ except ExtractorError as err:
self._downloader.report_warning(u'unable to download video subtitles: %s' % compat_str(err))
return {}
lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
From 6a2449df3b6604377a1ff516cbf4e25dff1d5f35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Wed, 11 Sep 2013 17:36:23 +0200
Subject: [PATCH 064/215] [howcast] Do not download from
http://www.howcast.com/videos/{video_id}
It takes too much to follow the redirection.
---
youtube_dl/extractor/howcast.py | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/howcast.py b/youtube_dl/extractor/howcast.py
index 6104c4b5e..46954337f 100644
--- a/youtube_dl/extractor/howcast.py
+++ b/youtube_dl/extractor/howcast.py
@@ -19,8 +19,7 @@ class HowcastIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
- webpage_url = 'http://www.howcast.com/videos/' + video_id
- webpage = self._download_webpage(webpage_url, video_id)
+ webpage = self._download_webpage(url, video_id)
self.report_extraction(video_id)
From ac4f319ba10d7d7d06079121cb19cddbf933824e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Wed, 11 Sep 2013 17:58:51 +0200
Subject: [PATCH 065/215] Credit @iemejia
---
youtube_dl/__init__.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 0083f2e99..696e54f49 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -29,6 +29,7 @@ __authors__ = (
'Albert Kim',
'Pierre Rudloff',
'Huarong Huo',
+ 'Ismael Mejía',
)
__license__ = 'Public Domain'
From 055e6f36577497d807d4f474db2489f2e0ef1d4e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Wed, 11 Sep 2013 19:02:01 +0200
Subject: [PATCH 066/215] [youtube] Support automatic captions with original
language different from English (fixes #1225) and download in multiple
languages.
---
youtube_dl/extractor/subtitles.py | 51 +++++++++++++------------------
youtube_dl/extractor/youtube.py | 37 ++++++++++++++--------
2 files changed, 47 insertions(+), 41 deletions(-)
diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py
index 9a3c54b65..a6780f176 100644
--- a/youtube_dl/extractor/subtitles.py
+++ b/youtube_dl/extractor/subtitles.py
@@ -15,28 +15,33 @@ class SubtitlesInfoExtractor(InfoExtractor):
self.to_screen(u'%s: Available subtitles for video: %s' %
(video_id, sub_lang))
- def _extract_subtitles(self, video_id):
+ def extract_subtitles(self, video_id, video_webpage=None):
""" returns {sub_lang: sub} or {} if subtitles not found """
- available_subs_list = self._get_available_subtitles(video_id)
+ if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
+ available_subs_list = self._get_available_subtitles(video_id)
+ elif self._downloader.params.get('writeautomaticsub', False):
+ available_subs_list = self._get_available_automatic_caption(video_id, video_webpage)
+ else:
+ return None
+
if not available_subs_list: # error, it didn't get the available subtitles
return {}
if self._downloader.params.get('allsubtitles', False):
sub_lang_list = available_subs_list
else:
- if self._downloader.params.get('writesubtitles', False):
- if self._downloader.params.get('subtitleslangs', False):
- requested_langs = self._downloader.params.get('subtitleslangs')
- elif 'en' in available_subs_list:
- requested_langs = ['en']
- else:
- requested_langs = [list(available_subs_list.keys())[0]]
+ if self._downloader.params.get('subtitleslangs', False):
+ requested_langs = self._downloader.params.get('subtitleslangs')
+ elif 'en' in available_subs_list:
+ requested_langs = ['en']
+ else:
+ requested_langs = [list(available_subs_list.keys())[0]]
- sub_lang_list = {}
- for sub_lang in requested_langs:
- if not sub_lang in available_subs_list:
- self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
- continue
- sub_lang_list[sub_lang] = available_subs_list[sub_lang]
+ sub_lang_list = {}
+ for sub_lang in requested_langs:
+ if not sub_lang in available_subs_list:
+ self._downloader.report_warning(u'no closed captions found in the specified language "%s"' % sub_lang)
+ continue
+ sub_lang_list[sub_lang] = available_subs_list[sub_lang]
subtitles = {}
for sub_lang, url in sub_lang_list.items():
@@ -64,23 +69,11 @@ class SubtitlesInfoExtractor(InfoExtractor):
"""
pass
- def _request_automatic_caption(self, video_id, webpage):
+ def _get_available_automatic_caption(self, video_id, webpage):
"""
- returns {sub_lang: sub} or {} if not available
+ returns {sub_lang: url} or {} if not available
Must be redefined by the subclasses that support automatic captions,
otherwise it will return {}
"""
self._downloader.report_warning(u'Automatic Captions not supported by this server')
return {}
-
- def extract_subtitles(self, video_id, video_webpage=None):
- """
- Extract the subtitles and/or the automatic captions if requested.
- Returns None or a dictionary in the format {sub_lang: sub}
- """
- video_subtitles = None
- if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
- video_subtitles = self._extract_subtitles(video_id)
- elif self._downloader.params.get('writeautomaticsub', False):
- video_subtitles = self._request_automatic_caption(video_id, video_webpage)
- return video_subtitles
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index d06cc49c4..46f977ce7 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -5,6 +5,7 @@ import netrc
import re
import socket
import itertools
+import xml.etree.ElementTree
from .common import InfoExtractor, SearchInfoExtractor
from .subtitles import SubtitlesInfoExtractor
@@ -478,14 +479,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return {}
return sub_lang_list
- def _request_automatic_caption(self, video_id, webpage):
+ def _get_available_automatic_caption(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an
argument to speed up the process."""
- sub_lang = (self._downloader.params.get('subtitleslangs') or ['en'])[0]
sub_format = self._downloader.params.get('subtitlesformat')
self.to_screen(u'%s: Looking for automatic captions' % video_id)
mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
- err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang
+ err_msg = u'Couldn\'t find automatic captions for %s' % video_id
if mobj is None:
self._downloader.report_warning(err_msg)
return {}
@@ -494,16 +494,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
args = player_config[u'args']
caption_url = args[u'ttsurl']
timestamp = args[u'timestamp']
- params = compat_urllib_parse.urlencode({
- 'lang': 'en',
- 'tlang': sub_lang,
- 'fmt': sub_format,
- 'ts': timestamp,
- 'kind': 'asr',
+ # We get the available subtitles
+ list_params = compat_urllib_parse.urlencode({
+ 'type': 'list',
+ 'tlangs': 1,
+ 'asrs': 1,
})
- subtitles_url = caption_url + '&' + params
- sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions')
- return {sub_lang: sub}
+ list_url = caption_url + '&' + list_params
+ list_page = self._download_webpage(list_url, video_id)
+ caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
+ original_lang = caption_list.find('track').attrib['lang_code']
+
+ sub_lang_list = {}
+ for lang_node in caption_list.findall('target'):
+ sub_lang = lang_node.attrib['lang_code']
+ params = compat_urllib_parse.urlencode({
+ 'lang': original_lang,
+ 'tlang': sub_lang,
+ 'fmt': sub_format,
+ 'ts': timestamp,
+ 'kind': 'asr',
+ })
+ sub_lang_list[sub_lang] = caption_url + '&' + params
+ return sub_lang_list
# An extractor error can be raise by the download process if there are
# no automatic captions but there are subtitles
except (KeyError, ExtractorError):
From d665f8d3cbb3ff507a76421e66fa96f55a830c08 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Wed, 11 Sep 2013 19:17:30 +0200
Subject: [PATCH 067/215] [subtitles] Also list the available automatic
captions languages with '--list-sub'
---
youtube_dl/extractor/subtitles.py | 6 +++++-
youtube_dl/extractor/youtube.py | 2 +-
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py
index a6780f176..a95629765 100644
--- a/youtube_dl/extractor/subtitles.py
+++ b/youtube_dl/extractor/subtitles.py
@@ -8,12 +8,16 @@ from ..utils import (
class SubtitlesInfoExtractor(InfoExtractor):
- def _list_available_subtitles(self, video_id):
+ def _list_available_subtitles(self, video_id, webpage=None):
""" outputs the available subtitles for the video """
sub_lang_list = self._get_available_subtitles(video_id)
+ auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
sub_lang = ",".join(list(sub_lang_list.keys()))
self.to_screen(u'%s: Available subtitles for video: %s' %
(video_id, sub_lang))
+ auto_lang = ",".join(auto_captions_list.keys())
+ self.to_screen(u'%s: Available automatic captions for video: %s' %
+ (video_id, auto_lang))
def extract_subtitles(self, video_id, video_webpage=None):
""" returns {sub_lang: sub} or {} if subtitles not found """
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 46f977ce7..331fd143a 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -724,7 +724,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_subtitles = self.extract_subtitles(video_id, video_webpage)
if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id)
+ self._list_available_subtitles(video_id, video_webpage)
return
if 'length_seconds' not in video_info:
From e3dc22ca3a1790da7a70753ac2c62709d1103175 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Wed, 11 Sep 2013 19:24:56 +0200
Subject: [PATCH 068/215] [youtube] Fix detection of videos with automatic
captions
---
youtube_dl/extractor/youtube.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 331fd143a..2e0d70eaf 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -503,7 +503,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
list_url = caption_url + '&' + list_params
list_page = self._download_webpage(list_url, video_id)
caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
- original_lang = caption_list.find('track').attrib['lang_code']
+ original_lang_node = caption_list.find('track')
+ if original_lang_node.attrib.get('kind') != 'asr' :
+ self._downloader.report_warning(u'Video doesn\'t have automatic captions')
+ return {}
+ original_lang = original_lang_node.attrib['lang_code']
sub_lang_list = {}
for lang_node in caption_list.findall('target'):
From f1d20fa39f264508aa7219ccc4bc64f59b970f04 Mon Sep 17 00:00:00 2001
From: Johny Mo Swag
Date: Wed, 11 Sep 2013 14:50:38 -0700
Subject: [PATCH 069/215] added kickstarter IE
---
youtube_dl/extractor/__init__.py | 1 +
youtube_dl/extractor/kickstarter.py | 43 +++++++++++++++++++++++++++++
2 files changed, 44 insertions(+)
create mode 100644 youtube_dl/extractor/kickstarter.py
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index fbe0b8cb7..5711b6bba 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -48,6 +48,7 @@ from .jeuxvideo import JeuxVideoIE
from .jukebox import JukeboxIE
from .justintv import JustinTVIE
from .kankan import KankanIE
+from .kickstarter import KickStarterIE
from .keek import KeekIE
from .liveleak import LiveLeakIE
from .livestream import LivestreamIE
diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py
new file mode 100644
index 000000000..7f6f2b064
--- /dev/null
+++ b/youtube_dl/extractor/kickstarter.py
@@ -0,0 +1,43 @@
+import re
+
+from .common import InfoExtractor
+
+
+class KickStarterIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P.*)/.*\?'
+ _TEST = {
+ "url": "https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location",
+ "file": "1404461844.mp4",
+ "md5": "c81addca81327ffa66c642b5d8b08cab",
+ "info_dict": {
+ "title": u"Intersection: The Story of Josh Grant by Kyle Cowling \u2014 Kickstarter"
+ }
+ }
+
+
+ def _real_extract(self, url):
+ m = re.match(self._VALID_URL, url)
+ video_id = m.group('id')
+
+ webpage_src = self._download_webpage(url, video_id)
+
+ video_url = self._search_regex(r'data-video="(.*?)">',
+ webpage_src, u'video URL')
+
+ if 'mp4' in video_url:
+ ext = 'mp4'
+ else:
+ ext = 'flv'
+
+ video_title = self._html_search_regex(r"(.*)?",
+ webpage_src, u'title')
+
+
+ results = [{
+ 'id': video_id,
+ 'url' : video_url,
+ 'title' : video_title,
+ 'ext' : ext,
+ }]
+
+ return results
\ No newline at end of file
From 6bc520c20706df9e5b32c4d2bccc2d46fb005fc5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Thu, 12 Sep 2013 11:15:25 +0200
Subject: [PATCH 070/215] Check for both automatic captions and subtitles with
options `--write-sub` and `--write-auto-sub` (fixes #1224)
---
youtube_dl/extractor/subtitles.py | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py
index a95629765..97215f289 100644
--- a/youtube_dl/extractor/subtitles.py
+++ b/youtube_dl/extractor/subtitles.py
@@ -7,6 +7,11 @@ from ..utils import (
class SubtitlesInfoExtractor(InfoExtractor):
+ @property
+ def _have_to_download_any_subtitles(self):
+ return any([self._downloader.params.get('writesubtitles', False),
+ self._downloader.params.get('writeautomaticsub'),
+ self._downloader.params.get('allsubtitles', False)])
def _list_available_subtitles(self, video_id, webpage=None):
""" outputs the available subtitles for the video """
@@ -20,13 +25,17 @@ class SubtitlesInfoExtractor(InfoExtractor):
(video_id, auto_lang))
def extract_subtitles(self, video_id, video_webpage=None):
- """ returns {sub_lang: sub} or {} if subtitles not found """
- if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
- available_subs_list = self._get_available_subtitles(video_id)
- elif self._downloader.params.get('writeautomaticsub', False):
- available_subs_list = self._get_available_automatic_caption(video_id, video_webpage)
- else:
+ """
+ returns {sub_lang: sub} ,{} if subtitles not found or None if the
+ subtitles aren't requested.
+ """
+ if not self._have_to_download_any_subtitles:
return None
+ available_subs_list = {}
+ if self._downloader.params.get('writeautomaticsub', False):
+ available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage))
+ if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
+ available_subs_list.update(self._get_available_subtitles(video_id))
if not available_subs_list: # error, it didn't get the available subtitles
return {}
From 07ac9e2cc2c269d7eb015d6de5bc6e66981d4e31 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Thu, 12 Sep 2013 11:26:44 +0200
Subject: [PATCH 071/215] release 2013.09.12
---
README.md | 6 ++----
youtube_dl/version.py | 2 +-
2 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/README.md b/README.md
index 2776cb3eb..400e6cd48 100644
--- a/README.md
+++ b/README.md
@@ -123,10 +123,8 @@ which means you can modify it, redistribute it or use it however you like.
only)
## Subtitle Options:
- --write-sub write subtitle file (currently youtube only)
- --write-auto-sub write automatic subtitle file (currently youtube
- only)
- --only-sub [deprecated] alias of --skip-download
+ --write-sub write subtitle file
+ --write-auto-sub write automatic subtitle file (youtube only)
--all-subs downloads all the available subtitles of the
video
--list-subs lists all available subtitles for the video
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 795f4c2c7..3b2505c77 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.11.09'
+__version__ = '2013.09.12'
From c247d87ef3a7f03dfcc28e3fc23dee9ec34835d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Thu, 12 Sep 2013 11:31:27 +0200
Subject: [PATCH 072/215] [funnyordie] fix video url extraction
---
youtube_dl/extractor/funnyordie.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py
index 4508f0dfa..f3d86a711 100644
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@@ -21,7 +21,7 @@ class FunnyOrDieIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
- video_url = self._search_regex(r'type: "video/mp4", src: "(.*?)"',
+ video_url = self._search_regex(r'type="video/mp4" src="(.*?)"',
webpage, u'video URL', flags=re.DOTALL)
info = {
From bfd5c93af9f9eee938c628f19c997f999f21c74e Mon Sep 17 00:00:00 2001
From: tewe
Date: Thu, 12 Sep 2013 12:30:14 +0200
Subject: [PATCH 073/215] Add Ustream channel support
---
youtube_dl/extractor/__init__.py | 2 +-
youtube_dl/extractor/ustream.py | 70 ++++++++++++++++++++++++++++++++
2 files changed, 71 insertions(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 26cf24935..a7cddef73 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -96,7 +96,7 @@ from .tudou import TudouIE
from .tumblr import TumblrIE
from .tutv import TutvIE
from .unistra import UnistraIE
-from .ustream import UstreamIE
+from .ustream import UstreamIE, UstreamChannelIE
from .vbox7 import Vbox7IE
from .veehd import VeeHDIE
from .veoh import VeohIE
diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py
index 5f423870a..16cdcc765 100644
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@@ -1,4 +1,7 @@
+from HTMLParser import HTMLParser
+import json
import re
+from urlparse import urljoin
from .common import InfoExtractor
@@ -43,3 +46,70 @@ class UstreamIE(InfoExtractor):
'thumbnail': thumbnail,
}
return info
+
+# More robust than regular expressions
+
+class ChannelParser(HTMLParser):
+ """
+
+ """
+ channel_id = None
+
+ def handle_starttag(self, tag, attrs):
+ if tag != 'meta':
+ return
+ values = dict(attrs)
+ if values.get('name') != 'ustream:channel_id':
+ return
+ value = values.get('content', '')
+ if value.isdigit():
+ self.channel_id = value
+
+class SocialstreamParser(HTMLParser):
+ """
+
+ """
+ def __init__(self):
+ HTMLParser.__init__(self)
+ self.content_ids = []
+
+ def handle_starttag(self, tag, attrs):
+ if tag != 'li':
+ return
+ for (attr, value) in attrs:
+ if attr == 'data-content-id' and value.isdigit():
+ self.content_ids.append(value)
+
+class UstreamChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P.+)'
+ IE_NAME = u'ustream:channel'
+
+ def _real_extract(self, url):
+ m = re.match(self._VALID_URL, url)
+ slug = m.group('slug')
+ # Slugs can be non-ascii, but youtube-dl can't handle non-ascii command lines,
+ # so if we got this far it's probably percent encoded and we needn't worry.
+
+ p = ChannelParser()
+ p.feed(self._download_webpage(url, slug))
+ p.close()
+ channel_id = p.channel_id
+
+ p = SocialstreamParser()
+ BASE = 'http://www.ustream.tv'
+ next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
+ while next_url:
+ reply = json.loads(self._download_webpage(urljoin(BASE, next_url), channel_id))
+ p.feed(reply['data'])
+ next_url = reply['nextUrl']
+ p.close()
+ video_ids = p.content_ids
+
+ # From YoutubeChannelIE
+
+ self._downloader.to_screen(u'[ustream] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
+
+ urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids]
+ url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls]
+ return [self.playlist_result(url_entries, channel_id)]
From 83de7942237b8294df47f318133fc5d7c260d496 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Thu, 12 Sep 2013 16:30:43 +0200
Subject: [PATCH 074/215] Add original buildserver from @fraca7
---
devscripts/buildserver.py | 272 ++++++++++++++++++++++++++++++++++++++
1 file changed, 272 insertions(+)
create mode 100644 devscripts/buildserver.py
diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py
new file mode 100644
index 000000000..4fe80edce
--- /dev/null
+++ b/devscripts/buildserver.py
@@ -0,0 +1,272 @@
+#!/usr/bin/python
+
+## This is free and unencumbered software released into the public domain.
+
+## Anyone is free to copy, modify, publish, use, compile, sell, or
+## distribute this software, either in source code form or as a compiled
+## binary, for any purpose, commercial or non-commercial, and by any
+## means.
+
+## In jurisdictions that recognize copyright laws, the author or authors
+## of this software dedicate any and all copyright interest in the
+## software to the public domain. We make this dedication for the benefit
+## of the public at large and to the detriment of our heirs and
+## successors. We intend this dedication to be an overt act of
+## relinquishment in perpetuity of all present and future rights to this
+## software under copyright law.
+
+## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+## EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+## MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+## IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+## OTHER DEALINGS IN THE SOFTWARE.
+
+## For more information, please refer to
+
+from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
+from SocketServer import ThreadingMixIn
+import getopt, threading, sys, urlparse, _winreg, os, subprocess, shutil, tempfile
+
+
+class BuildHTTPServer(ThreadingMixIn, HTTPServer):
+ allow_reuse_address = True
+
+
+def usage():
+ print 'Usage: %s [options]'
+ print 'Options:'
+ print
+ print ' -h, --help Display this help'
+ print ' -i, --install Launch at session startup'
+ print ' -u, --uninstall Do not launch at session startup'
+ print ' -b, --bind Bind to host:port (default localhost:8142)'
+ sys.exit(0)
+
+
+def main(argv):
+ opts, args = getopt.getopt(argv, 'hb:iu', ['help', 'bind=', 'install', 'uninstall'])
+ host = 'localhost'
+ port = 8142
+
+ for opt, val in opts:
+ if opt in ['-h', '--help']:
+ usage()
+ elif opt in ['-b', '--bind']:
+ try:
+ host, port = val.split(':')
+ except ValueError:
+ host = val
+ else:
+ port = int(port)
+ elif opt in ['-i', '--install']:
+ key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, r'Software\Microsoft\Windows\CurrentVersion\Run', 0, _winreg.KEY_WRITE)
+ try:
+ _winreg.SetValueEx(key, 'Youtube-dl builder', 0, _winreg.REG_SZ,
+ '"%s" "%s" -b %s:%d' % (sys.executable, os.path.normpath(os.path.abspath(sys.argv[0])),
+ host, port))
+ finally:
+ _winreg.CloseKey(key)
+ print 'Installed.'
+ sys.exit(0)
+ elif opt in ['-u', '--uninstall']:
+ key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, r'Software\Microsoft\Windows\CurrentVersion\Run', 0, _winreg.KEY_WRITE)
+ try:
+ _winreg.DeleteValue(key, 'Youtube-dl builder')
+ finally:
+ _winreg.CloseKey(key)
+ print 'Uninstalled.'
+ sys.exit(0)
+
+ print 'Listening on %s:%d' % (host, port)
+ srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler)
+ thr = threading.Thread(target=srv.serve_forever)
+ thr.start()
+ raw_input('Hit to stop...\n')
+ srv.shutdown()
+ thr.join()
+
+
+def rmtree(path):
+ for name in os.listdir(path):
+ fname = os.path.join(path, name)
+ if os.path.isdir(fname):
+ rmtree(fname)
+ else:
+ os.chmod(fname, 0666)
+ os.remove(fname)
+ os.rmdir(path)
+
+#==============================================================================
+
+class BuildError(Exception):
+ def __init__(self, output, code=500):
+ self.output = output
+ self.code = code
+
+ def __str__(self):
+ return self.output
+
+
+class HTTPError(BuildError):
+ pass
+
+
+class PythonBuilder(object):
+ def __init__(self, **kwargs):
+ pythonVersion = kwargs.pop('python', '2.7')
+ try:
+ key = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, r'SOFTWARE\Python\PythonCore\%s\InstallPath' % pythonVersion)
+ try:
+ self.pythonPath, _ = _winreg.QueryValueEx(key, '')
+ finally:
+ _winreg.CloseKey(key)
+ except Exception:
+ raise BuildError('No such Python version: %s' % pythonVersion)
+
+ super(PythonBuilder, self).__init__(**kwargs)
+
+
+class GITInfoBuilder(object):
+ def __init__(self, **kwargs):
+ try:
+ self.user, self.repoName = kwargs['path'][:2]
+ self.rev = kwargs.pop('rev')
+ except ValueError:
+ raise BuildError('Invalid path')
+ except KeyError as e:
+ raise BuildError('Missing mandatory parameter "%s"' % e.args[0])
+
+ path = os.path.join(os.environ['APPDATA'], 'Build archive', self.repoName, self.user)
+ if not os.path.exists(path):
+ os.makedirs(path)
+ self.basePath = tempfile.mkdtemp(dir=path)
+ self.buildPath = os.path.join(self.basePath, 'build')
+
+ super(GITInfoBuilder, self).__init__(**kwargs)
+
+
+class GITBuilder(GITInfoBuilder):
+ def build(self):
+ try:
+ subprocess.check_output(['git', 'clone', 'git://github.com/%s/%s.git' % (self.user, self.repoName), self.buildPath])
+ subprocess.check_output(['git', 'checkout', self.rev], cwd=self.buildPath)
+ except subprocess.CalledProcessError as e:
+ raise BuildError(e.output)
+
+ super(GITBuilder, self).build()
+
+
+class YoutubeDLBuilder(object):
+ authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile']
+
+ def __init__(self, **kwargs):
+ if self.repoName != 'youtube-dl':
+ raise BuildError('Invalid repository "%s"' % self.repoName)
+ if self.user not in self.authorizedUsers:
+ raise HTTPError('Unauthorized user "%s"' % self.user, 401)
+
+ super(YoutubeDLBuilder, self).__init__(**kwargs)
+
+ def build(self):
+ try:
+ subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'],
+ cwd=self.buildPath)
+ except subprocess.CalledProcessError as e:
+ raise BuildError(e.output)
+
+ super(YoutubeDLBuilder, self).build()
+
+
+class DownloadBuilder(object):
+ def __init__(self, **kwargs):
+ self.handler = kwargs.pop('handler')
+ self.srcPath = os.path.join(self.buildPath, *tuple(kwargs['path'][2:]))
+ self.srcPath = os.path.abspath(os.path.normpath(self.srcPath))
+ if not self.srcPath.startswith(self.buildPath):
+ raise HTTPError(self.srcPath, 401)
+
+ super(DownloadBuilder, self).__init__(**kwargs)
+
+ def build(self):
+ if not os.path.exists(self.srcPath):
+ raise HTTPError('No such file', 404)
+ if os.path.isdir(self.srcPath):
+ raise HTTPError('Is a directory: %s' % self.srcPath, 401)
+
+ self.handler.send_response(200)
+ self.handler.send_header('Content-Type', 'application/octet-stream')
+ self.handler.send_header('Content-Disposition', 'attachment; filename=%s' % os.path.split(self.srcPath)[-1])
+ self.handler.send_header('Content-Length', str(os.stat(self.srcPath).st_size))
+ self.handler.end_headers()
+
+ with open(self.srcPath, 'rb') as src:
+ shutil.copyfileobj(src, self.handler.wfile)
+
+ super(DownloadBuilder, self).build()
+
+
+class CleanupTempDir(object):
+ def build(self):
+ try:
+ rmtree(self.basePath)
+ except Exception as e:
+ print 'WARNING deleting "%s": %s' % (self.basePath, e)
+
+ super(CleanupTempDir, self).build()
+
+
+class Null(object):
+ def __init__(self, **kwargs):
+ pass
+
+ def start(self):
+ pass
+
+ def close(self):
+ pass
+
+ def build(self):
+ pass
+
+
+class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, CleanupTempDir, Null):
+ pass
+
+
+class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
+ actionDict = { 'build': Builder, 'download': Builder } # They're the same, no more caching.
+
+ def do_GET(self):
+ path = urlparse.urlparse(self.path)
+ paramDict = dict([(key, value[0]) for key, value in urlparse.parse_qs(path.query).items()])
+ action, _, path = path.path.strip('/').partition('/')
+ if path:
+ path = path.split('/')
+ if action in self.actionDict:
+ try:
+ builder = self.actionDict[action](path=path, handler=self, **paramDict)
+ builder.start()
+ try:
+ builder.build()
+ finally:
+ builder.close()
+ except BuildError as e:
+ self.send_response(e.code)
+ msg = unicode(e).encode('UTF-8')
+ self.send_header('Content-Type', 'text/plain; charset=UTF-8')
+ self.send_header('Content-Length', len(msg))
+ self.end_headers()
+ self.wfile.write(msg)
+ except HTTPError as e:
+ self.send_response(e.code, str(e))
+ else:
+ self.send_response(500, 'Unknown build method "%s"' % action)
+ else:
+ self.send_response(500, 'Malformed URL')
+
+#==============================================================================
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
From 353ba14060528d981213c66131bc770f478935de Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Thu, 12 Sep 2013 16:34:24 +0200
Subject: [PATCH 075/215] [buildserver] Rely on repository license
---
devscripts/buildserver.py | 25 -------------------------
1 file changed, 25 deletions(-)
diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py
index 4fe80edce..edc437dee 100644
--- a/devscripts/buildserver.py
+++ b/devscripts/buildserver.py
@@ -1,30 +1,5 @@
#!/usr/bin/python
-## This is free and unencumbered software released into the public domain.
-
-## Anyone is free to copy, modify, publish, use, compile, sell, or
-## distribute this software, either in source code form or as a compiled
-## binary, for any purpose, commercial or non-commercial, and by any
-## means.
-
-## In jurisdictions that recognize copyright laws, the author or authors
-## of this software dedicate any and all copyright interest in the
-## software to the public domain. We make this dedication for the benefit
-## of the public at large and to the detriment of our heirs and
-## successors. We intend this dedication to be an overt act of
-## relinquishment in perpetuity of all present and future rights to this
-## software under copyright law.
-
-## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-## EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-## MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-## IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-## OTHER DEALINGS IN THE SOFTWARE.
-
-## For more information, please refer to
-
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
from SocketServer import ThreadingMixIn
import getopt, threading, sys, urlparse, _winreg, os, subprocess, shutil, tempfile
From ad94a6fe446543788730f5096c7b74229702805e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Thu, 12 Sep 2013 21:56:36 +0200
Subject: [PATCH 076/215] [canalplust] accept urls that don't include the video
id (fixes #1415), extract more info and update test
---
youtube_dl/extractor/canalplus.py | 22 +++++++++++++++-------
1 file changed, 15 insertions(+), 7 deletions(-)
diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py
index 1f02519a0..1db9b24cf 100644
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@@ -1,3 +1,4 @@
+# encoding: utf-8
import re
import xml.etree.ElementTree
@@ -5,24 +6,29 @@ from .common import InfoExtractor
from ..utils import unified_strdate
class CanalplusIE(InfoExtractor):
- _VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P\d+)'
+ _VALID_URL = r'https?://(www\.canalplus\.fr/.*?/(?P.*)|player\.canalplus\.fr/#/(?P\d+))'
_VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
IE_NAME = u'canalplus.fr'
_TEST = {
- u'url': u'http://www.canalplus.fr/c-divertissement/pid3351-c-le-petit-journal.html?vid=889861',
- u'file': u'889861.flv',
- u'md5': u'590a888158b5f0d6832f84001fbf3e99',
+ u'url': u'http://www.canalplus.fr/c-infos-documentaires/pid1830-c-zapping.html?vid=922470',
+ u'file': u'922470.flv',
u'info_dict': {
- u'title': u'Le Petit Journal 20/06/13 - La guerre des drone',
- u'upload_date': u'20130620',
+ u'title': u'Zapping - 26/08/13',
+ u'description': u'Le meilleur de toutes les chaînes, tous les jours.\nEmission du 26 août 2013',
+ u'upload_date': u'20130826',
+ },
+ u'params': {
+ u'skip_download': True,
},
- u'skip': u'Requires rtmpdump'
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
+ if video_id is None:
+ webpage = self._download_webpage(url, mobj.group('path'))
+ video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
info_url = self._VIDEO_INFO_TEMPLATE % video_id
info_page = self._download_webpage(info_url,video_id,
u'Downloading video info')
@@ -43,4 +49,6 @@ class CanalplusIE(InfoExtractor):
'ext': 'flv',
'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text),
'thumbnail': media.find('IMAGES/GRAND').text,
+ 'description': infos.find('DESCRIPTION').text,
+ 'view_count': int(infos.find('NB_VUES').text),
}
From ce85f022d2c78b3f58982b486c8c628d22158573 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Thu, 12 Sep 2013 22:04:09 +0200
Subject: [PATCH 077/215] [youtube] update algo for length 82 (fixes #1416)
---
devscripts/youtube_genalgo.py | 4 ++--
youtube_dl/extractor/youtube.py | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py
index 6e3595366..b390c7e2e 100644
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -32,9 +32,9 @@ tests = [
# 83
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
- # 82 - vflZK4ZYR 2013/08/23
+ # 82 - vflGNjMhJ 2013/09/12
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
- "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>.<"),
+ ".>/?;}[<=+-(*&^%$#@!MNBVCXeASDFGHKLPOqUYTREWQ0987654321mnbvcxzasdfghjklpoiuytrIwZ"),
# 81 - vflLC8JvQ 2013/07/25
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
"C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 2e0d70eaf..f49665925 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -434,7 +434,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
elif len(s) == 83:
return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
elif len(s) == 82:
- return s[1:19] + s[0] + s[20:68] + s[19] + s[69:82]
+ return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
elif len(s) == 81:
return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9]
elif len(s) == 80:
From dd01d6558a142deb93fe7d6122ae698ecdea4f63 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Thu, 12 Sep 2013 22:18:39 +0200
Subject: [PATCH 078/215] [gamespot] Update test video title
---
youtube_dl/extractor/gamespot.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py
index 7585b7061..cd3bbe65f 100644
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dl/extractor/gamespot.py
@@ -14,7 +14,7 @@ class GameSpotIE(InfoExtractor):
u"file": u"6410818.mp4",
u"md5": u"b2a30deaa8654fcccd43713a6b6a4825",
u"info_dict": {
- u"title": u"Arma III - Community Guide: SITREP I",
+ u"title": u"Arma 3 - Community Guide: SITREP I",
u"upload_date": u"20130627",
}
}
From 71cedb3c0c6860c1cf06792a8e98940f88b34a17 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Fri, 13 Sep 2013 02:25:12 +0200
Subject: [PATCH 079/215] [buildserver] Service installation and uninstallation
---
devscripts/buildserver.py | 141 +++++++++++++++++++++++++-------------
1 file changed, 93 insertions(+), 48 deletions(-)
diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py
index edc437dee..45c875b23 100644
--- a/devscripts/buildserver.py
+++ b/devscripts/buildserver.py
@@ -1,64 +1,109 @@
-#!/usr/bin/python
+#!/usr/bin/python3
-from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
-from SocketServer import ThreadingMixIn
-import getopt, threading, sys, urlparse, _winreg, os, subprocess, shutil, tempfile
+from http.server import HTTPServer, BaseHTTPRequestHandler
+from socketserver import ThreadingMixIn
+import argparse
+import ctypes
+import sys
+import threading
+import os.path
class BuildHTTPServer(ThreadingMixIn, HTTPServer):
allow_reuse_address = True
-def usage():
- print 'Usage: %s [options]'
- print 'Options:'
- print
- print ' -h, --help Display this help'
- print ' -i, --install Launch at session startup'
- print ' -u, --uninstall Do not launch at session startup'
- print ' -b, --bind Bind to host:port (default localhost:8142)'
- sys.exit(0)
+advapi32 = ctypes.windll.advapi32
+
+SC_MANAGER_ALL_ACCESS = 0xf003f
+SC_MANAGER_CREATE_SERVICE = 0x02
+SERVICE_WIN32_OWN_PROCESS = 0x10
+SERVICE_AUTO_START = 0x2
+SERVICE_ERROR_NORMAL = 0x1
+DELETE = 0x00010000
+
+
+def win_OpenSCManager():
+ res = advapi32.OpenSCManagerA(None, None, SC_MANAGER_ALL_ACCESS)
+ if not res:
+ raise Exception('Opening service manager failed - '
+ 'are you running this as administrator?')
+ return res
+
+
+def win_install_service(service_name, cmdline):
+ manager = win_OpenSCManager()
+ try:
+ h = advapi32.CreateServiceA(
+ manager, service_name, None,
+ SC_MANAGER_CREATE_SERVICE, SERVICE_WIN32_OWN_PROCESS,
+ SERVICE_AUTO_START, SERVICE_ERROR_NORMAL,
+ cmdline, None, None, None, None, None)
+ if not h:
+ raise OSError('Service creation failed: %s' % ctypes.FormatError())
+
+ advapi32.CloseServiceHandle(h)
+ finally:
+ advapi32.CloseServiceHandle(manager)
+
+
+def win_uninstall_service(service_name):
+ manager = win_OpenSCManager()
+ try:
+ h = advapi32.OpenServiceA(manager, service_name, DELETE)
+ if not h:
+ raise OSError('Could not find service %s: %s' % (
+ service_name, ctypes.FormatError()))
+
+ try:
+ if not advapi32.DeleteService(h):
+ raise OSError('Deletion failed: %s' % ctypes.FormatError())
+ finally:
+ advapi32.CloseServiceHandle(h)
+ finally:
+ advapi32.CloseServiceHandle(manager)
+
+
+def install_service(bind):
+ fn = os.path.normpath(__file__)
+ cmdline = '"%s" "%s" -s -b "%s"' % (sys.executable, fn, bind)
+ win_install_service('youtubedl_builder', cmdline)
+
+
+def uninstall_service():
+ win_uninstall_service('youtubedl_builder')
def main(argv):
- opts, args = getopt.getopt(argv, 'hb:iu', ['help', 'bind=', 'install', 'uninstall'])
- host = 'localhost'
- port = 8142
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-i', '--install',
+ action='store_const', dest='action', const='install',
+ help='Launch at Windows startup')
+ parser.add_argument('-u', '--uninstall',
+ action='store_const', dest='action', const='uninstall',
+ help='Remove Windows service')
+ parser.add_argument('-s', '--service',
+ action='store_const', dest='action', const='servce',
+ help='Run as a Windows service')
+ parser.add_argument('-b', '--bind', metavar='',
+ action='store', default='localhost:8142',
+ help='Bind to host:port (default %default)')
+ options = parser.parse_args()
- for opt, val in opts:
- if opt in ['-h', '--help']:
- usage()
- elif opt in ['-b', '--bind']:
- try:
- host, port = val.split(':')
- except ValueError:
- host = val
- else:
- port = int(port)
- elif opt in ['-i', '--install']:
- key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, r'Software\Microsoft\Windows\CurrentVersion\Run', 0, _winreg.KEY_WRITE)
- try:
- _winreg.SetValueEx(key, 'Youtube-dl builder', 0, _winreg.REG_SZ,
- '"%s" "%s" -b %s:%d' % (sys.executable, os.path.normpath(os.path.abspath(sys.argv[0])),
- host, port))
- finally:
- _winreg.CloseKey(key)
- print 'Installed.'
- sys.exit(0)
- elif opt in ['-u', '--uninstall']:
- key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, r'Software\Microsoft\Windows\CurrentVersion\Run', 0, _winreg.KEY_WRITE)
- try:
- _winreg.DeleteValue(key, 'Youtube-dl builder')
- finally:
- _winreg.CloseKey(key)
- print 'Uninstalled.'
- sys.exit(0)
+ if options.action == 'install':
+ return install_service(options.bind)
- print 'Listening on %s:%d' % (host, port)
+ if options.action == 'uninstall':
+ return uninstall_service()
+
+ host, port_str = options.bind.split(':')
+ port = int(port_str)
+
+ print('Listening on %s:%d' % (host, port))
srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler)
thr = threading.Thread(target=srv.serve_forever)
thr.start()
- raw_input('Hit to stop...\n')
+ input('Press ENTER to shut down')
srv.shutdown()
thr.join()
@@ -69,7 +114,7 @@ def rmtree(path):
if os.path.isdir(fname):
rmtree(fname)
else:
- os.chmod(fname, 0666)
+ os.chmod(fname, 0o666)
os.remove(fname)
os.rmdir(path)
@@ -187,7 +232,7 @@ class CleanupTempDir(object):
try:
rmtree(self.basePath)
except Exception as e:
- print 'WARNING deleting "%s": %s' % (self.basePath, e)
+ print('WARNING deleting "%s": %s' % (self.basePath, e))
super(CleanupTempDir, self).build()
From a921f40799d2ecb4be53b3241d2dbfc80f804d73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Fri, 13 Sep 2013 22:05:29 +0200
Subject: [PATCH 080/215] [ustream] Simplify channel extraction
the ChannelParser has been moved to a new function in utils get_meta_content
Instead of the SocialStreamParser now it uses a regex
---
test/test_utils.py | 28 +++++++++++++-----
youtube_dl/extractor/ustream.py | 49 ++++---------------------------
youtube_dl/utils.py | 52 ++++++++++++++++++++++++++++-----
3 files changed, 70 insertions(+), 59 deletions(-)
diff --git a/test/test_utils.py b/test/test_utils.py
index be1069105..ff2e9885b 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -11,13 +11,16 @@ import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
#from youtube_dl.utils import htmlentity_transform
-from youtube_dl.utils import timeconvert
-from youtube_dl.utils import sanitize_filename
-from youtube_dl.utils import unescapeHTML
-from youtube_dl.utils import orderedSet
-from youtube_dl.utils import DateRange
-from youtube_dl.utils import unified_strdate
-from youtube_dl.utils import find_xpath_attr
+from youtube_dl.utils import (
+ timeconvert,
+ sanitize_filename,
+ unescapeHTML,
+ orderedSet,
+ DateRange,
+ unified_strdate,
+ find_xpath_attr,
+ get_meta_content,
+)
if sys.version_info < (3, 0):
_compat_str = lambda b: b.decode('unicode-escape')
@@ -127,5 +130,16 @@ class TestUtil(unittest.TestCase):
self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1])
self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2])
+ def test_meta_parser(self):
+ testhtml = u'''
+
+
+
+
+ '''
+ get_meta = lambda name: get_meta_content(name, testhtml)
+ self.assertEqual(get_meta('description'), u'foo & bar')
+ self.assertEqual(get_meta('author'), 'Plato')
+
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py
index f69b27d44..74c82587f 100644
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dl/extractor/ustream.py
@@ -4,7 +4,7 @@ import re
from .common import InfoExtractor
from ..utils import (
compat_urlparse,
- compat_html_parser,
+ get_meta_content,
)
@@ -49,40 +49,6 @@ class UstreamIE(InfoExtractor):
}
return info
-# More robust than regular expressions
-
-class ChannelParser(compat_html_parser.HTMLParser):
- """
-
- """
- channel_id = None
-
- def handle_starttag(self, tag, attrs):
- if tag != 'meta':
- return
- values = dict(attrs)
- if values.get('name') != 'ustream:channel_id':
- return
- value = values.get('content', '')
- if value.isdigit():
- self.channel_id = value
-
-class SocialstreamParser(compat_html_parser.HTMLParser):
- """
-
- """
- def __init__(self):
- compat_html_parser.HTMLParser.__init__(self)
- self.content_ids = []
-
- def handle_starttag(self, tag, attrs):
- if tag != 'li':
- return
- for (attr, value) in attrs:
- if attr == 'data-content-id' and value.isdigit():
- self.content_ids.append(value)
-
class UstreamChannelIE(InfoExtractor):
_VALID_URL = r'https?://www\.ustream\.tv/channel/(?P.+)'
IE_NAME = u'ustream:channel'
@@ -90,21 +56,16 @@ class UstreamChannelIE(InfoExtractor):
def _real_extract(self, url):
m = re.match(self._VALID_URL, url)
slug = m.group('slug')
+ webpage = self._download_webpage(url, slug)
+ channel_id = get_meta_content('ustream:channel_id', webpage)
- p = ChannelParser()
- p.feed(self._download_webpage(url, slug))
- p.close()
- channel_id = p.channel_id
-
- p = SocialstreamParser()
BASE = 'http://www.ustream.tv'
next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id
+ video_ids = []
while next_url:
reply = json.loads(self._download_webpage(compat_urlparse.urljoin(BASE, next_url), channel_id))
- p.feed(reply['data'])
+ video_ids.extend(re.findall(r'data-content-id="(\d.*)"', reply['data']))
next_url = reply['nextUrl']
- p.close()
- video_ids = p.content_ids
urls = ['http://www.ustream.tv/recorded/' + vid for vid in video_ids]
url_entries = [self.url_result(eurl, 'Ustream') for eurl in urls]
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 201802cee..768c6207d 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -249,7 +249,17 @@ def htmlentity_transform(matchobj):
return (u'&%s;' % entity)
compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
-class AttrParser(compat_html_parser.HTMLParser):
+class BaseHTMLParser(compat_html_parser.HTMLParser):
+ def __init(self):
+ compat_html_parser.HTMLParser.__init__(self)
+ self.html = None
+
+ def loads(self, html):
+ self.html = html
+ self.feed(html)
+ self.close()
+
+class AttrParser(BaseHTMLParser):
"""Modified HTMLParser that isolates a tag with the specified attribute"""
def __init__(self, attribute, value):
self.attribute = attribute
@@ -257,10 +267,9 @@ class AttrParser(compat_html_parser.HTMLParser):
self.result = None
self.started = False
self.depth = {}
- self.html = None
self.watch_startpos = False
self.error_count = 0
- compat_html_parser.HTMLParser.__init__(self)
+ BaseHTMLParser.__init__(self)
def error(self, message):
if self.error_count > 10 or self.started:
@@ -269,11 +278,6 @@ class AttrParser(compat_html_parser.HTMLParser):
self.error_count += 1
self.goahead(1)
- def loads(self, html):
- self.html = html
- self.feed(html)
- self.close()
-
def handle_starttag(self, tag, attrs):
attrs = dict(attrs)
if self.started:
@@ -334,6 +338,38 @@ def get_element_by_attribute(attribute, value, html):
pass
return parser.get_result()
+class MetaParser(BaseHTMLParser):
+ """
+ Modified HTMLParser that isolates a meta tag with the specified name
+ attribute.
+ """
+ def __init__(self, name):
+ BaseHTMLParser.__init__(self)
+ self.name = name
+ self.content = None
+ self.result = None
+
+ def handle_starttag(self, tag, attrs):
+ if tag != 'meta':
+ return
+ attrs = dict(attrs)
+ if attrs.get('name') == self.name:
+ self.result = attrs.get('content')
+
+ def get_result(self):
+ return self.result
+
+def get_meta_content(name, html):
+ """
+ Return the content attribute from the meta tag with the given name attribute.
+ """
+ parser = MetaParser(name)
+ try:
+ parser.loads(html)
+ except compat_html_parser.HTMLParseError:
+ pass
+ return parser.get_result()
+
def clean_html(html):
"""Clean an HTML snippet into a readable string"""
From 9a1c32dc54fdefcd6b5e03fac1a0dd65383b6f99 Mon Sep 17 00:00:00 2001
From: rzhxeo
Date: Sat, 14 Sep 2013 05:42:00 +0200
Subject: [PATCH 081/215] XHamsterIE: Add support for new URL format
---
youtube_dl/extractor/xhamster.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
index 88b8b6be0..e50069586 100644
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -11,7 +11,7 @@ from ..utils import (
class XHamsterIE(InfoExtractor):
"""Information Extractor for xHamster"""
- _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P[0-9]+)/.*\.html'
+ _VALID_URL = r'(?:http://)?(?P(?:www\.)?xhamster\.com/movies/(?P[0-9]+)/.*\.html(?:\?.*)?)'
_TEST = {
u'url': u'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
u'file': u'1509445.flv',
@@ -27,7 +27,7 @@ class XHamsterIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
- mrss_url = 'http://xhamster.com/movies/%s/.html' % video_id
+ mrss_url = 'http://' + mobj.group('url')
webpage = self._download_webpage(mrss_url, video_id)
mobj = re.search(r'\'srv\': \'(?P[^\']*)\',\s*\'file\': \'(?P[^\']+)\',', webpage)
From fad84d50fe124df1c620c9bc95bdc4c9e5053e6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sat, 14 Sep 2013 11:10:01 +0200
Subject: [PATCH 082/215] [googleplus] Fix upload date extraction
---
youtube_dl/extractor/googleplus.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py
index f1cd88983..8895ad289 100644
--- a/youtube_dl/extractor/googleplus.py
+++ b/youtube_dl/extractor/googleplus.py
@@ -40,7 +40,8 @@ class GooglePlusIE(InfoExtractor):
self.report_extraction(video_id)
# Extract update date
- upload_date = self._html_search_regex('title="Timestamp">(.*?)',
+ upload_date = self._html_search_regex(
+ ['title="Timestamp">(.*?)', r'(.+?)'],
webpage, u'upload date', fatal=False)
if upload_date:
# Convert timestring to a format suitable for filename
From 0b7f31184d6a2d87cf7f568c561ff8d017f07bd4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sat, 14 Sep 2013 11:14:40 +0200
Subject: [PATCH 083/215] Now --all-sub is a modifier to --write-sub and
--write-auto-sub (closes #1412)
For keeping backwards compatibility --all-sub sets --write-sub if --write-auto-sub is not given
---
test/test_dailymotion_subtitles.py | 2 ++
test/test_youtube_subtitles.py | 2 ++
youtube_dl/YoutubeDL.py | 4 ++--
youtube_dl/__init__.py | 5 +++++
youtube_dl/extractor/subtitles.py | 5 ++---
5 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/test/test_dailymotion_subtitles.py b/test/test_dailymotion_subtitles.py
index bcd9f79f6..83c65d57e 100644
--- a/test/test_dailymotion_subtitles.py
+++ b/test/test_dailymotion_subtitles.py
@@ -40,6 +40,7 @@ class TestDailymotionSubtitles(unittest.TestCase):
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792')
def test_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 5)
@@ -54,6 +55,7 @@ class TestDailymotionSubtitles(unittest.TestCase):
self.assertTrue(len(subtitles.keys()) == 0)
def test_nosubtitles(self):
self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv'
+ self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py
index 5632871ac..168e6c66c 100644
--- a/test/test_youtube_subtitles.py
+++ b/test/test_youtube_subtitles.py
@@ -41,6 +41,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
subtitles = self.getSubtitles()
self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d')
def test_youtube_allsubtitles(self):
+ self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles.keys()), 13)
@@ -66,6 +67,7 @@ class TestYoutubeSubtitles(unittest.TestCase):
self.assertTrue(subtitles['it'] is not None)
def test_youtube_nosubtitles(self):
self.url = 'sAjKT8FhjI8'
+ self.DL.params['writesubtitles'] = True
self.DL.params['allsubtitles'] = True
subtitles = self.getSubtitles()
self.assertEqual(len(subtitles), 0)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index c2f992b8e..e53a2b8ad 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -74,6 +74,7 @@ class YoutubeDL(object):
writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatic subtitles to a file
allsubtitles: Downloads all the subtitles of the video
+ (requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video
subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
subtitleslangs: List of languages of the subtitles to download
@@ -499,8 +500,7 @@ class YoutubeDL(object):
return
subtitles_are_requested = any([self.params.get('writesubtitles', False),
- self.params.get('writeautomaticsub'),
- self.params.get('allsubtitles', False)])
+ self.params.get('writeautomaticsub')])
if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
# subtitles download errors are already managed as troubles in relevant IE
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 696e54f49..0022a4e7a 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -533,6 +533,11 @@ def _real_main(argv=None):
else:
date = DateRange(opts.dateafter, opts.datebefore)
+ # --all-sub automatically sets --write-sub if --write-auto-sub is not given
+ # this was the old behaviour if only --all-sub was given.
+ if opts.allsubtitles and (opts.writeautomaticsub == False):
+ opts.writesubtitles = True
+
if sys.version_info < (3,):
# In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems)
if opts.outtmpl is not None:
diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py
index 97215f289..90de7de3a 100644
--- a/youtube_dl/extractor/subtitles.py
+++ b/youtube_dl/extractor/subtitles.py
@@ -10,8 +10,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
@property
def _have_to_download_any_subtitles(self):
return any([self._downloader.params.get('writesubtitles', False),
- self._downloader.params.get('writeautomaticsub'),
- self._downloader.params.get('allsubtitles', False)])
+ self._downloader.params.get('writeautomaticsub')])
def _list_available_subtitles(self, video_id, webpage=None):
""" outputs the available subtitles for the video """
@@ -34,7 +33,7 @@ class SubtitlesInfoExtractor(InfoExtractor):
available_subs_list = {}
if self._downloader.params.get('writeautomaticsub', False):
available_subs_list.update(self._get_available_automatic_caption(video_id, video_webpage))
- if self._downloader.params.get('writesubtitles', False) or self._downloader.params.get('allsubtitles', False):
+ if self._downloader.params.get('writesubtitles', False):
available_subs_list.update(self._get_available_subtitles(video_id))
if not available_subs_list: # error, it didn't get the available subtitles
From 19e1d35989970831007b7ca5d988fe0454f08a1f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sat, 14 Sep 2013 14:26:42 +0200
Subject: [PATCH 084/215] [mixcloud] Rewrite extractor (fixes #278)
---
youtube_dl/extractor/mixcloud.py | 118 ++++++++++---------------------
youtube_dl/utils.py | 11 ++-
2 files changed, 46 insertions(+), 83 deletions(-)
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
index 8245b5583..a200dcd74 100644
--- a/youtube_dl/extractor/mixcloud.py
+++ b/youtube_dl/extractor/mixcloud.py
@@ -5,34 +5,27 @@ import socket
from .common import InfoExtractor
from ..utils import (
compat_http_client,
- compat_str,
compat_urllib_error,
compat_urllib_request,
-
- ExtractorError,
+ unified_strdate,
)
class MixcloudIE(InfoExtractor):
- _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/
_VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
IE_NAME = u'mixcloud'
- def report_download_json(self, file_id):
- """Report JSON download."""
- self.to_screen(u'Downloading json')
-
- def get_urls(self, jsonData, fmt, bitrate='best'):
- """Get urls from 'audio_formats' section in json"""
- try:
- bitrate_list = jsonData[fmt]
- if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list:
- bitrate = max(bitrate_list) # select highest
-
- url_list = jsonData[fmt][bitrate]
- except TypeError: # we have no bitrate info.
- url_list = jsonData[fmt]
- return url_list
+ _TEST = {
+ u'url': u'http://www.mixcloud.com/dholbach/cryptkeeper/',
+ u'file': u'dholbach-cryptkeeper.mp3',
+ u'info_dict': {
+ u'title': u'Cryptkeeper',
+ u'description': u'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
+ u'uploader': u'Daniel Holbach',
+ u'uploader_id': u'dholbach',
+ u'upload_date': u'20111115',
+ },
+ }
def check_urls(self, url_list):
"""Returns 1st active url from list"""
@@ -45,71 +38,32 @@ class MixcloudIE(InfoExtractor):
return None
- def _print_formats(self, formats):
- print('Available formats:')
- for fmt in formats.keys():
- for b in formats[fmt]:
- try:
- ext = formats[fmt][b][0]
- print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]))
- except TypeError: # we have no bitrate info
- ext = formats[fmt][0]
- print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]))
- break
-
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
- # extract uploader & filename from url
- uploader = mobj.group(1).decode('utf-8')
- file_id = uploader + "-" + mobj.group(2).decode('utf-8')
- # construct API request
- file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json'
- # retrieve .json file with links to files
- request = compat_urllib_request.Request(file_url)
- try:
- self.report_download_json(file_url)
- jsonData = compat_urllib_request.urlopen(request).read()
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- raise ExtractorError(u'Unable to retrieve file: %s' % compat_str(err))
+ uploader = mobj.group(1)
+ cloudcast_name = mobj.group(2)
+ track_id = '-'.join((uploader, cloudcast_name))
+ api_url = 'http://api.mixcloud.com/%s/%s/' % (uploader, cloudcast_name)
+ webpage = self._download_webpage(url, track_id)
+ json_data = self._download_webpage(api_url, track_id,
+ u'Downloading cloudcast info')
+ info = json.loads(json_data)
- # parse JSON
- json_data = json.loads(jsonData)
- player_url = json_data['player_swf_url']
- formats = dict(json_data['audio_formats'])
+ preview_url = self._search_regex(r'data-preview-url="(.+?)"', webpage, u'preview url')
+ song_url = preview_url.replace('/previews/', '/cloudcasts/originals/')
+ template_url = re.sub(r'(stream\d*)', 'stream%d', song_url)
+ final_song_url = self.check_urls(template_url % i for i in range(30))
- req_format = self._downloader.params.get('format', None)
-
- if self._downloader.params.get('listformats', None):
- self._print_formats(formats)
- return
-
- if req_format is None or req_format == 'best':
- for format_param in formats.keys():
- url_list = self.get_urls(formats, format_param)
- # check urls
- file_url = self.check_urls(url_list)
- if file_url is not None:
- break # got it!
- else:
- if req_format not in formats:
- raise ExtractorError(u'Format is not available')
-
- url_list = self.get_urls(formats, req_format)
- file_url = self.check_urls(url_list)
- format_param = req_format
-
- return [{
- 'id': file_id.decode('utf-8'),
- 'url': file_url.decode('utf-8'),
- 'uploader': uploader.decode('utf-8'),
- 'upload_date': None,
- 'title': json_data['name'],
- 'ext': file_url.split('.')[-1].decode('utf-8'),
- 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
- 'thumbnail': json_data['thumbnail_url'],
- 'description': json_data['description'],
- 'player_url': player_url.decode('utf-8'),
- }]
+ return {
+ 'id': track_id,
+ 'title': info['name'],
+ 'url': final_song_url,
+ 'ext': 'mp3',
+ 'description': info['description'],
+ 'thumbnail': info['pictures'].get('extra_large'),
+ 'uploader': info['user']['name'],
+ 'uploader_id': info['user']['username'],
+ 'upload_date': unified_strdate(info['created_time']),
+ 'view_count': info['play_count'],
+ }
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 768c6207d..5558d4737 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -700,7 +700,16 @@ def unified_strdate(date_str):
date_str = date_str.replace(',',' ')
# %z (UTC offset) is only supported in python>=3.2
date_str = re.sub(r' (\+|-)[\d]*$', '', date_str)
- format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y', '%Y-%m-%d', '%d/%m/%Y', '%Y/%m/%d %H:%M:%S', '%d.%m.%Y %H:%M']
+ format_expressions = [
+ '%d %B %Y',
+ '%B %d %Y',
+ '%b %d %Y',
+ '%Y-%m-%d',
+ '%d/%m/%Y',
+ '%Y/%m/%d %H:%M:%S',
+ '%d.%m.%Y %H:%M',
+ '%Y-%m-%dT%H:%M:%SZ',
+ ]
for expression in format_expressions:
try:
upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
From 471a5ee908ee765c1ba1ff6a41051bcf71065064 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sat, 14 Sep 2013 14:45:04 +0200
Subject: [PATCH 085/215] Set the ext field for each format
---
youtube_dl/extractor/archiveorg.py | 7 ++++---
youtube_dl/extractor/dreisat.py | 6 +++---
youtube_dl/extractor/trilulilu.py | 4 ++--
3 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py
index 7efd1d823..61ce4469a 100644
--- a/youtube_dl/extractor/archiveorg.py
+++ b/youtube_dl/extractor/archiveorg.py
@@ -46,6 +46,8 @@ class ArchiveOrgIE(InfoExtractor):
for fn,fdata in data['files'].items()
if 'Video' in fdata['format']]
formats.sort(key=lambda fdata: fdata['file_size'])
+ for f in formats:
+ f['ext'] = determine_ext(f['url'])
info = {
'_type': 'video',
@@ -61,7 +63,6 @@ class ArchiveOrgIE(InfoExtractor):
info['thumbnail'] = thumbnail
# TODO: Remove when #980 has been merged
- info['url'] = formats[-1]['url']
- info['ext'] = determine_ext(formats[-1]['url'])
+ info.update(formats[-1])
- return info
\ No newline at end of file
+ return info
diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py
index 64b465805..765cb1f37 100644
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@@ -54,6 +54,7 @@ class DreiSatIE(InfoExtractor):
'width': int(fe.find('./width').text),
'height': int(fe.find('./height').text),
'url': fe.find('./url').text,
+ 'ext': determine_ext(fe.find('./url').text),
'filesize': int(fe.find('./filesize').text),
'video_bitrate': int(fe.find('./videoBitrate').text),
'3sat_qualityname': fe.find('./quality').text,
@@ -79,7 +80,6 @@ class DreiSatIE(InfoExtractor):
}
# TODO: Remove when #980 has been merged
- info['url'] = formats[-1]['url']
- info['ext'] = determine_ext(formats[-1]['url'])
+ info.update(formats[-1])
- return info
\ No newline at end of file
+ return info
diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dl/extractor/trilulilu.py
index f278951ba..0bf028f61 100644
--- a/youtube_dl/extractor/trilulilu.py
+++ b/youtube_dl/extractor/trilulilu.py
@@ -52,6 +52,7 @@ class TriluliluIE(InfoExtractor):
{
'format': fnode.text,
'url': video_url_template % fnode.text,
+ 'ext': fnode.text.partition('-')[0]
}
for fnode in format_doc.findall('./formats/format')
@@ -67,7 +68,6 @@ class TriluliluIE(InfoExtractor):
}
# TODO: Remove when #980 has been merged
- info['url'] = formats[-1]['url']
- info['ext'] = formats[-1]['format'].partition('-')[0]
+ info.update(formats[-1])
return info
From 92790f4e542fc3d5f4cc02a647a2695d9175d464 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sat, 14 Sep 2013 21:41:49 +0200
Subject: [PATCH 086/215] [soundcloud] Add an extractor for users (closes
#1426)
---
test/test_playlists.py | 10 ++++++-
youtube_dl/extractor/__init__.py | 2 +-
youtube_dl/extractor/soundcloud.py | 45 ++++++++++++++++++++++++++++--
3 files changed, 53 insertions(+), 4 deletions(-)
diff --git a/test/test_playlists.py b/test/test_playlists.py
index 4a2e00b01..d079a4f23 100644
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -8,7 +8,7 @@ import json
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE
+from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE, SoundcloudUserIE
from youtube_dl.utils import *
from helper import FakeYDL
@@ -42,5 +42,13 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], u'5124905')
self.assertTrue(len(result['entries']) >= 11)
+ def test_soundcloud_user(self):
+ dl = FakeYDL()
+ ie = SoundcloudUserIE(dl)
+ result = ie.extract('https://soundcloud.com/the-concept-band')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['id'], u'9615865')
+ self.assertTrue(len(result['entries']) >= 12)
+
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 06f9542d2..19d57c2e9 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -82,7 +82,7 @@ from .sina import SinaIE
from .slashdot import SlashdotIE
from .slideshare import SlideshareIE
from .sohu import SohuIE
-from .soundcloud import SoundcloudIE, SoundcloudSetIE
+from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
from .spiegel import SpiegelIE
from .stanfordoc import StanfordOpenClassroomIE
from .statigram import StatigramIE
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
index 5f3a5540d..29cd5617c 100644
--- a/youtube_dl/extractor/soundcloud.py
+++ b/youtube_dl/extractor/soundcloud.py
@@ -1,10 +1,12 @@
import json
import re
+import itertools
from .common import InfoExtractor
from ..utils import (
compat_str,
compat_urlparse,
+ compat_urllib_parse,
ExtractorError,
unified_strdate,
@@ -53,10 +55,11 @@ class SoundcloudIE(InfoExtractor):
def _resolv_url(cls, url):
return 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
- def _extract_info_dict(self, info, full_title=None):
+ def _extract_info_dict(self, info, full_title=None, quiet=False):
video_id = info['id']
name = full_title or video_id
- self.report_extraction(name)
+ if quiet == False:
+ self.report_extraction(name)
thumbnail = info['artwork_url']
if thumbnail is not None:
@@ -198,3 +201,41 @@ class SoundcloudSetIE(SoundcloudIE):
'id': info['id'],
'title': info['title'],
}
+
+
+class SoundcloudUserIE(SoundcloudIE):
+ _VALID_URL = r'https?://(www\.)?soundcloud.com/(?P[^/]+)(/?(tracks/)?)?(\?.*)?$'
+ IE_NAME = u'soundcloud:user'
+
+ # it's in tests/test_playlists.py
+ _TEST = None
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ uploader = mobj.group('user')
+
+ url = 'http://soundcloud.com/%s/' % uploader
+ resolv_url = self._resolv_url(url)
+ user_json = self._download_webpage(resolv_url, uploader,
+ u'Downloading user info')
+ user = json.loads(user_json)
+
+ tracks = []
+ for i in itertools.count():
+ data = compat_urllib_parse.urlencode({'offset': i*50,
+ 'client_id': self._CLIENT_ID,
+ })
+ tracks_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % user['id'] + data
+ response = self._download_webpage(tracks_url, uploader,
+ u'Downloading tracks page %s' % (i+1))
+ new_tracks = json.loads(response)
+ tracks.extend(self._extract_info_dict(track, quiet=True) for track in new_tracks)
+ if len(new_tracks) < 50:
+ break
+
+ return {
+ '_type': 'playlist',
+ 'id': compat_str(user['id']),
+ 'title': user['username'],
+ 'entries': tracks,
+ }
From e69ae5b9e74910541e75eea4c8dfc13066f28f65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sun, 15 Sep 2013 12:14:59 +0200
Subject: [PATCH 087/215] [youtube] support youtube.googleapis.com/v/* urls
(fixes #1425)
---
test/test_all_urls.py | 1 +
youtube_dl/extractor/youtube.py | 3 ++-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index 99fc7bd28..ff1c86efe 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -36,6 +36,7 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertFalse(YoutubeIE.suitable(u'https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) #668
self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube'])
self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube'])
+ self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube'])
def test_youtube_channel_matching(self):
assertChannel = lambda url: self.assertMatch(url, ['youtube:channel'])
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index f49665925..e4a2e22bc 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -139,7 +139,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
(
(?:https?://)? # http(s):// (optional)
(?:(?:(?:(?:\w+\.)?youtube(?:-nocookie)?\.com/|
- tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains
+ tube\.majestyc\.net/|
+ youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
(?:.*?\#/)? # handle anchor (#/) redirect urls
(?: # the various things that can precede the ID:
(?:(?:v|embed|e)/) # v/ or embed/ or e/
From 5a6fecc3dee35f95f3590a31e51670819db5a1fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sun, 15 Sep 2013 23:30:58 +0200
Subject: [PATCH 088/215] Add an extractor for southparkstudios.com (closes
#1434)
It uses the MTV system
---
youtube_dl/extractor/__init__.py | 1 +
youtube_dl/extractor/southparkstudios.py | 34 ++++++++++++++++++++++++
2 files changed, 35 insertions(+)
create mode 100644 youtube_dl/extractor/southparkstudios.py
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 19d57c2e9..246f1e8b5 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -83,6 +83,7 @@ from .slashdot import SlashdotIE
from .slideshare import SlideshareIE
from .sohu import SohuIE
from .soundcloud import SoundcloudIE, SoundcloudSetIE, SoundcloudUserIE
+from .southparkstudios import SouthParkStudiosIE
from .spiegel import SpiegelIE
from .stanfordoc import StanfordOpenClassroomIE
from .statigram import StatigramIE
diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southparkstudios.py
new file mode 100644
index 000000000..a5dc754dd
--- /dev/null
+++ b/youtube_dl/extractor/southparkstudios.py
@@ -0,0 +1,34 @@
+import re
+
+from .mtv import MTVIE, _media_xml_tag
+
+
+class SouthParkStudiosIE(MTVIE):
+ IE_NAME = u'southparkstudios.com'
+ _VALID_URL = r'https?://www\.southparkstudios\.com/clips/(?P\d+)'
+
+ _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
+
+ _TEST = {
+ u'url': u'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured',
+ u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
+ u'info_dict': {
+ u'title': u'Bat Daded',
+ u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
+ },
+ }
+
+ # Overwrite MTVIE properties we don't want
+ _TESTS = []
+
+ def _get_thumbnail_url(self, uri, itemdoc):
+ search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
+ return itemdoc.find(search_path).attrib['url']
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ webpage = self._download_webpage(url, video_id)
+ mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
+ webpage, u'mgid')
+ return self._get_videos_info(mgid)
From 22b50ecb2f7f9e0469d281a4c401d4a531c1cc5b Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Mon, 16 Sep 2013 03:32:45 +0200
Subject: [PATCH 089/215] Starts of a Windows service
---
devscripts/buildserver.py | 143 ++++++++++++++++++++++++++++++++++----
1 file changed, 128 insertions(+), 15 deletions(-)
diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py
index 45c875b23..e0c3cc83e 100644
--- a/devscripts/buildserver.py
+++ b/devscripts/buildserver.py
@@ -4,8 +4,10 @@ from http.server import HTTPServer, BaseHTTPRequestHandler
from socketserver import ThreadingMixIn
import argparse
import ctypes
+import functools
import sys
import threading
+import traceback
import os.path
@@ -21,10 +23,40 @@ SERVICE_WIN32_OWN_PROCESS = 0x10
SERVICE_AUTO_START = 0x2
SERVICE_ERROR_NORMAL = 0x1
DELETE = 0x00010000
+SERVICE_STATUS_START_PENDING = 0x00000002
+SERVICE_STATUS_RUNNING = 0x00000004
+SERVICE_ACCEPT_STOP = 0x1
+
+SVCNAME = 'youtubedl_builder'
+
+LPTSTR = ctypes.c_wchar_p
+START_CALLBACK = ctypes.WINFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(LPTSTR))
+
+
+class SERVICE_TABLE_ENTRY(ctypes.Structure):
+ _fields_ = [
+ ('lpServiceName', LPTSTR),
+ ('lpServiceProc', START_CALLBACK)
+ ]
+
+
+HandlerEx = ctypes.WINFUNCTYPE(
+ ctypes.c_int, # return
+ ctypes.c_int, # dwControl
+ ctypes.c_int, # dwEventType
+ ctypes.c_void_p, # lpEventData,
+ ctypes.c_void_p, # lpContext,
+)
+
+
+def _ctypes_array(c_type, py_array):
+ ar = (c_type * len(py_array))()
+ ar[:] = py_array
+ return ar
def win_OpenSCManager():
- res = advapi32.OpenSCManagerA(None, None, SC_MANAGER_ALL_ACCESS)
+ res = advapi32.OpenSCManagerW(None, None, SC_MANAGER_ALL_ACCESS)
if not res:
raise Exception('Opening service manager failed - '
'are you running this as administrator?')
@@ -34,7 +66,7 @@ def win_OpenSCManager():
def win_install_service(service_name, cmdline):
manager = win_OpenSCManager()
try:
- h = advapi32.CreateServiceA(
+ h = advapi32.CreateServiceW(
manager, service_name, None,
SC_MANAGER_CREATE_SERVICE, SERVICE_WIN32_OWN_PROCESS,
SERVICE_AUTO_START, SERVICE_ERROR_NORMAL,
@@ -50,7 +82,7 @@ def win_install_service(service_name, cmdline):
def win_uninstall_service(service_name):
manager = win_OpenSCManager()
try:
- h = advapi32.OpenServiceA(manager, service_name, DELETE)
+ h = advapi32.OpenServiceW(manager, service_name, DELETE)
if not h:
raise OSError('Could not find service %s: %s' % (
service_name, ctypes.FormatError()))
@@ -64,17 +96,90 @@ def win_uninstall_service(service_name):
advapi32.CloseServiceHandle(manager)
-def install_service(bind):
- fn = os.path.normpath(__file__)
- cmdline = '"%s" "%s" -s -b "%s"' % (sys.executable, fn, bind)
- win_install_service('youtubedl_builder', cmdline)
+def win_service_report_event(service_name, msg, is_error=True):
+ with open('C:/sshkeys/log', 'a', encoding='utf-8') as f:
+ f.write(msg + '\n')
+
+ event_log = advapi32.RegisterEventSourceW(None, service_name)
+ if not event_log:
+ raise OSError('Could not report event: %s' % ctypes.FormatError())
+
+ try:
+ type_id = 0x0001 if is_error else 0x0004
+ event_id = 0xc0000000 if is_error else 0x40000000
+ lines = _ctypes_array(LPTSTR, [msg])
+
+ if not advapi32.ReportEventW(
+ event_log, type_id, 0, event_id, None, len(lines), 0,
+ lines, None):
+ raise OSError('Event reporting failed: %s' % ctypes.FormatError())
+ finally:
+ advapi32.DeregisterEventSource(event_log)
-def uninstall_service():
- win_uninstall_service('youtubedl_builder')
+def win_service_handler(stop_event, *args):
+ try:
+ raise ValueError('Handler called with args ' + repr(args))
+ TODO
+ except Exception as e:
+ tb = traceback.format_exc()
+ msg = str(e) + '\n' + tb
+ win_service_report_event(service_name, msg, is_error=True)
+ raise
-def main(argv):
+def win_service_set_status(handle, status_code):
+ svcStatus = SERVICE_STATUS()
+ svcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS
+ svcStatus.dwCurrentState = status_code
+ svcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP
+
+ svcStatus.dwServiceSpecificExitCode = 0
+
+ if not advapi32.SetServiceStatus(handle, ctypes.byref(svcStatus)):
+ raise OSError('SetServiceStatus failed: %r' % ctypes.FormatError())
+
+
+def win_service_main(service_name, real_main, argc, argv_raw):
+ try:
+ #args = [argv_raw[i].value for i in range(argc)]
+ stop_event = threading.Event()
+ handler = HandlerEx(functools.partial(stop_event, win_service_handler))
+ h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None)
+ if not h:
+ raise OSError('Handler registration failed: %s' %
+ ctypes.FormatError())
+
+ TODO
+ except Exception as e:
+ tb = traceback.format_exc()
+ msg = str(e) + '\n' + tb
+ win_service_report_event(service_name, msg, is_error=True)
+ raise
+
+
+def win_service_start(service_name, real_main):
+ try:
+ cb = START_CALLBACK(
+ functools.partial(win_service_main, service_name, real_main))
+ dispatch_table = _ctypes_array(SERVICE_TABLE_ENTRY, [
+ SERVICE_TABLE_ENTRY(
+ service_name,
+ cb
+ ),
+ SERVICE_TABLE_ENTRY(None, ctypes.cast(None, START_CALLBACK))
+ ])
+
+ if not advapi32.StartServiceCtrlDispatcherW(dispatch_table):
+ raise OSError('ctypes start failed: %s' % ctypes.FormatError())
+ except Exception as e:
+ tb = traceback.format_exc()
+ msg = str(e) + '\n' + tb
+ win_service_report_event(service_name, msg, is_error=True)
+ raise
+
+
+def main(args=None):
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--install',
action='store_const', dest='action', const='install',
@@ -83,18 +188,26 @@ def main(argv):
action='store_const', dest='action', const='uninstall',
help='Remove Windows service')
parser.add_argument('-s', '--service',
- action='store_const', dest='action', const='servce',
+ action='store_const', dest='action', const='service',
help='Run as a Windows service')
parser.add_argument('-b', '--bind', metavar='',
action='store', default='localhost:8142',
help='Bind to host:port (default %default)')
- options = parser.parse_args()
+ options = parser.parse_args(args=args)
if options.action == 'install':
- return install_service(options.bind)
+ fn = os.path.abspath(__file__).replace('v:', '\\\\vboxsrv\\vbox')
+ cmdline = '%s %s -s -b %s' % (sys.executable, fn, options.bind)
+ win_install_service(SVCNAME, cmdline)
+ return
if options.action == 'uninstall':
- return uninstall_service()
+ win_uninstall_service(SVCNAME)
+ return
+
+ if options.action == 'service':
+ win_service_start(SVCNAME, main)
+ return
host, port_str = options.bind.split(':')
port = int(port_str)
@@ -289,4 +402,4 @@ class BuildHTTPRequestHandler(BaseHTTPRequestHandler):
#==============================================================================
if __name__ == '__main__':
- main(sys.argv[1:])
+ main()
From 6c603ccce334ae244d73c0e82eb5c59e36c3d027 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Mon, 16 Sep 2013 04:12:43 +0200
Subject: [PATCH 090/215] [devscripts/release] temporary workarounds
---
devscripts/release.sh | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/devscripts/release.sh b/devscripts/release.sh
index 62c68a6cf..796468b4b 100755
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@@ -55,8 +55,8 @@ git push origin "$version"
/bin/echo -e "\n### OK, now it is time to build the binaries..."
REV=$(git rev-parse HEAD)
make youtube-dl youtube-dl.tar.gz
-wget "http://jeromelaheurte.net:8142/download/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe || \
- wget "http://jeromelaheurte.net:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
+read -p "VM running? (y/n) " -n 1
+wget "http://localhost:8142/build/rg3/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
mkdir -p "build/$version"
mv youtube-dl youtube-dl.exe "build/$version"
mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
From f9e66fb99367b5ccac3f0c1c61441ed52d787836 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Mon, 16 Sep 2013 04:12:57 +0200
Subject: [PATCH 091/215] release 2013.09.16
---
youtube_dl/version.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 3b2505c77..e06b0cd6c 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.09.12'
+__version__ = '2013.09.16'
From 7459e3a29081dfa4cbbcc795e054e884e1d5e020 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Mon, 16 Sep 2013 06:55:33 +0200
Subject: [PATCH 092/215] Always correct encoding when writing to sys.stderr
(Fixes #1435)
---
youtube_dl/YoutubeDL.py | 6 +-----
youtube_dl/__init__.py | 20 ++++++++++----------
youtube_dl/utils.py | 12 ++++++++++++
3 files changed, 23 insertions(+), 15 deletions(-)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index e53a2b8ad..de2b133e0 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -142,14 +142,10 @@ class YoutubeDL(object):
def to_screen(self, message, skip_eol=False):
"""Print message to stdout if not in quiet mode."""
- assert type(message) == type(u'')
if not self.params.get('quiet', False):
terminator = [u'\n', u''][skip_eol]
output = message + terminator
- if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
- output = output.encode(preferredencoding(), 'ignore')
- self._screen_file.write(output)
- self._screen_file.flush()
+ write_string(output, self._screen_file)
def to_stderr(self, message):
"""Print message to stderr."""
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 0022a4e7a..9efd7c3f7 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -354,7 +354,7 @@ def parseOpts(overrideArguments=None):
if overrideArguments is not None:
opts, args = parser.parse_args(overrideArguments)
if opts.verbose:
- sys.stderr.write(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
+ write_string(u'[debug] Override config: ' + repr(overrideArguments) + '\n')
else:
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
if xdg_config_home:
@@ -367,9 +367,9 @@ def parseOpts(overrideArguments=None):
argv = systemConf + userConf + commandLineConf
opts, args = parser.parse_args(argv)
if opts.verbose:
- sys.stderr.write(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
- sys.stderr.write(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
- sys.stderr.write(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
+ write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n')
+ write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n')
+ write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n')
return parser, opts, args
@@ -392,7 +392,7 @@ def _real_main(argv=None):
except (IOError, OSError) as err:
if opts.verbose:
traceback.print_exc()
- sys.stderr.write(u'ERROR: unable to open cookie file\n')
+ write_string(u'ERROR: unable to open cookie file\n')
sys.exit(101)
# Set user agent
if opts.user_agent is not None:
@@ -419,7 +419,7 @@ def _real_main(argv=None):
batchurls = [x.strip() for x in batchurls]
batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
if opts.verbose:
- sys.stderr.write(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
+ write_string(u'[debug] Batch file urls: ' + repr(batchurls) + u'\n')
except IOError:
sys.exit(u'ERROR: batch file could not be read')
all_urls = batchurls + args
@@ -611,7 +611,7 @@ def _real_main(argv=None):
})
if opts.verbose:
- sys.stderr.write(u'[debug] youtube-dl version ' + __version__ + u'\n')
+ write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
@@ -620,14 +620,14 @@ def _real_main(argv=None):
out, err = sp.communicate()
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
- sys.stderr.write(u'[debug] Git HEAD: ' + out + u'\n')
+ write_string(u'[debug] Git HEAD: ' + out + u'\n')
except:
try:
sys.exc_clear()
except:
pass
- sys.stderr.write(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
- sys.stderr.write(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
+ write_string(u'[debug] Python version %s - %s' %(platform.python_version(), platform_name()) + u'\n')
+ write_string(u'[debug] Proxy map: ' + str(proxy_handler.proxies) + u'\n')
ydl.add_default_info_extractors()
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 5558d4737..814a9b6be 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -790,6 +790,18 @@ def platform_name():
return res
+def write_string(s, out=None):
+ if out is None:
+ out = sys.stderr
+ assert type(s) == type(u'')
+
+ if ('b' in getattr(out, 'mode', '') or
+ sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
+ s = s.encode(preferredencoding(), 'ignore')
+ out.write(s)
+ out.flush()
+
+
def bytes_to_intlist(bs):
if not bs:
return []
From 71c107fc5716dc769860ba6d3731184bde9a6902 Mon Sep 17 00:00:00 2001
From: rzhxeo
Date: Mon, 16 Sep 2013 14:45:14 +0200
Subject: [PATCH 093/215] Add FKTV extractor
Support for Fernsehkritik-TV (incl. Postecke)
---
youtube_dl/extractor/fktv.py | 58 ++++++++++++++++++++++++++++++++++++
1 file changed, 58 insertions(+)
create mode 100644 youtube_dl/extractor/fktv.py
diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py
new file mode 100644
index 000000000..239d9df38
--- /dev/null
+++ b/youtube_dl/extractor/fktv.py
@@ -0,0 +1,58 @@
+import re,random
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+)
+
+class FKTVIE(InfoExtractor):
+ """Information Extractor for Fernsehkritik-TV"""
+ _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/folge-(?P[0-9]+)(?:/.*)?'
+
+ def _real_extract(self,url):
+ mobj = re.match(self._VALID_URL, url)
+ episode = int(mobj.group('ep'))
+
+ server = random.randint(2,4)
+ video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode
+ videos = []
+ # Download all three parts
+ for i in range(1,4):
+ video_id = '%04d%d' % (episode, i)
+ video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i==1 else '-%d'%i)
+ video_title = 'Fernsehkritik %d.%d' % (episode, i)
+ videos.append({
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': determine_ext(video_url),
+ 'title': video_title,
+ 'thumbnail': video_thumbnail
+ })
+ return videos
+
+class FKTVPosteckeIE(InfoExtractor):
+ """Information Extractor for Fernsehkritik-TV Postecke"""
+ _VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/inline-video/postecke.php\?(.*&)?ep=(?P[0-9]+)(&|$)'
+ _TEST = {
+ u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120',
+ u'file': u'0120.flv',
+ u'md5': u'262f0adbac80317412f7e57b4808e5c4',
+ u'info_dict': {
+ u"title": u"Postecke 120"
+ }
+ }
+
+ def _real_extract(self,url):
+ mobj = re.match(self._VALID_URL, url)
+ episode = int(mobj.group('ep'))
+
+ server = random.randint(2,4)
+ video_id = '%04d' % episode
+ video_url = 'http://dl%d.fernsehkritik.tv/postecke/postecke%d.flv' % (server, episode)
+ video_title = 'Postecke %d' % episode
+ return[{
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': determine_ext(video_url),
+ 'title': video_title,
+ }]
From 0761d02b0baf20955bd6e4f53568a3bbaa75ab5c Mon Sep 17 00:00:00 2001
From: rzhxeo
Date: Mon, 16 Sep 2013 14:46:19 +0200
Subject: [PATCH 094/215] Add FKTV extractor
---
youtube_dl/extractor/__init__.py | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 06f9542d2..25a8e3cf5 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -28,6 +28,10 @@ from .eighttracks import EightTracksIE
from .escapist import EscapistIE
from .exfm import ExfmIE
from .facebook import FacebookIE
+from .fktv import (
+ FKTVIE,
+ FKTVPosteckeIE,
+)
from .flickr import FlickrIE
from .francetv import (
PluzzIE,
From c4ece785647e58afb4f7b72f492eaf8e714bceba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Mon, 16 Sep 2013 19:34:10 +0200
Subject: [PATCH 095/215] [ooyala] add support for more type of video urls,
like m3u8 manifests.
---
youtube_dl/extractor/ooyala.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py
index b734722d0..01b3637c9 100644
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@@ -35,7 +35,9 @@ class OoyalaIE(InfoExtractor):
mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
player, u'mobile player url')
mobile_player = self._download_webpage(mobile_url, embedCode)
- videos_info = self._search_regex(r'eval\("\((\[{.*?stream_redirect.*?}\])\)"\);', mobile_player, u'info').replace('\\"','"')
+ videos_info = self._search_regex(
+ r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
+ mobile_player, u'info').replace('\\"','"')
videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, u'more info').replace('\\"','"')
videos_info = json.loads(videos_info)
videos_more_info =json.loads(videos_more_info)
From 4b6462fc1e4306e4a1a5b3613b2cef5b09cc9abe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Mon, 16 Sep 2013 19:39:39 +0200
Subject: [PATCH 096/215] Add an extractor for Bloomberg (closes #1436)
---
youtube_dl/extractor/__init__.py | 1 +
youtube_dl/extractor/bloomberg.py | 27 +++++++++++++++++++++++++++
2 files changed, 28 insertions(+)
create mode 100644 youtube_dl/extractor/bloomberg.py
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 246f1e8b5..7973a81d0 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -6,6 +6,7 @@ from .arte import ArteTvIE
from .auengine import AUEngineIE
from .bandcamp import BandcampIE
from .bliptv import BlipTVIE, BlipTVUserIE
+from .bloomberg import BloombergIE
from .breakcom import BreakIE
from .brightcove import BrightcoveIE
from .c56 import C56IE
diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py
new file mode 100644
index 000000000..3666a780b
--- /dev/null
+++ b/youtube_dl/extractor/bloomberg.py
@@ -0,0 +1,27 @@
+import re
+
+from .common import InfoExtractor
+
+
+class BloombergIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P.+?).html'
+
+ _TEST = {
+ u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
+ u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4',
+ u'info_dict': {
+ u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies',
+ u'description': u'md5:abc86e5236f9f0e4866c59ad36736686',
+ },
+ u'params': {
+ # Requires ffmpeg (m3u8 manifest)
+ u'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ name = mobj.group('name')
+ webpage = self._download_webpage(url, name)
+ ooyala_url = self._og_search_video_url(webpage)
+ return self.url_result(ooyala_url, ie='Ooyala')
From 4dc0ff3ecf2118a0bac128cb8e006e151222e23b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Mon, 16 Sep 2013 20:16:52 +0200
Subject: [PATCH 097/215] [ooyala] prefer ipad url
It has better quality with m3u8 manifests
---
youtube_dl/extractor/ooyala.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py
index 01b3637c9..d189a9852 100644
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@@ -22,7 +22,7 @@ class OoyalaIE(InfoExtractor):
return {'id': info['embedCode'],
'ext': 'mp4',
'title': unescapeHTML(info['title']),
- 'url': info['url'],
+ 'url': info.get('ipad_url') or info['url'],
'description': unescapeHTML(more_info['description']),
'thumbnail': more_info['promo'],
}
From e8f8e800978c8845a706ebd3ab31bc1b98a51461 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Mon, 16 Sep 2013 20:58:36 +0200
Subject: [PATCH 098/215] Add an extractor for vice.com (closes #1051)
---
youtube_dl/extractor/__init__.py | 1 +
youtube_dl/extractor/ooyala.py | 4 ++++
youtube_dl/extractor/vice.py | 38 ++++++++++++++++++++++++++++++++
3 files changed, 43 insertions(+)
create mode 100644 youtube_dl/extractor/vice.py
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 7973a81d0..761575062 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -104,6 +104,7 @@ from .vbox7 import Vbox7IE
from .veehd import VeeHDIE
from .veoh import VeohIE
from .vevo import VevoIE
+from .vice import ViceIE
from .videofyme import VideofyMeIE
from .vimeo import VimeoIE, VimeoChannelIE
from .vine import VineIE
diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py
index d189a9852..1f7b4d2e7 100644
--- a/youtube_dl/extractor/ooyala.py
+++ b/youtube_dl/extractor/ooyala.py
@@ -18,6 +18,10 @@ class OoyalaIE(InfoExtractor):
},
}
+ @staticmethod
+ def _url_for_embed_code(embed_code):
+ return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
+
def _extract_result(self, info, more_info):
return {'id': info['embedCode'],
'ext': 'mp4',
diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py
new file mode 100644
index 000000000..6b93afa50
--- /dev/null
+++ b/youtube_dl/extractor/vice.py
@@ -0,0 +1,38 @@
+import re
+
+from .common import InfoExtractor
+from .ooyala import OoyalaIE
+from ..utils import ExtractorError
+
+
+class ViceIE(InfoExtractor):
+ _VALID_URL = r'http://www.vice.com/.*?/(?P.+)'
+
+ _TEST = {
+ u'url': u'http://www.vice.com/Fringes/cowboy-capitalists-part-1',
+ u'file': u'43cW1mYzpia9IlestBjVpd23Yu3afAfp.mp4',
+ u'info_dict': {
+ u'title': u'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
+ },
+ u'params': {
+ # Requires ffmpeg (m3u8 manifest)
+ u'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ name = mobj.group('name')
+ webpage = self._download_webpage(url, name)
+ try:
+ ooyala_url = self._og_search_video_url(webpage)
+ except ExtractorError:
+ try:
+ embed_code = self._search_regex(
+ r'OO.Player.create\(\'ooyalaplayer\', \'(.+?)\'', webpage,
+ u'ooyala embed code')
+ ooyala_url = OoyalaIE._url_for_embed_code(embed_code)
+ except ExtractorError:
+ raise ExtractorError(u'The page doesn\'t contain a video', expected=True)
+ return self.url_result(ooyala_url, ie='Ooyala')
+
From 6ae8ee3f542485b3c790fc09e1136762b1b80c89 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Tue, 17 Sep 2013 16:59:12 +0200
Subject: [PATCH 099/215] Update 85 signature (Fixes #1449)
This is the first signature algorithm to have been parsed automatically, although that only works for HTML5 players for now, and is not yet integrated into master.
---
devscripts/youtube_genalgo.py | 4 ++--
youtube_dl/extractor/youtube.py | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py
index b390c7e2e..66019ee55 100644
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -24,8 +24,8 @@ tests = [
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
"yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),
# 85 - vflkuzxcs 2013/09/11
- ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
- "T>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOvUY.REWQ0987654321mnbqcxzasdfghjklpoiuytr"),
+ ('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[',
+ '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@'),
# 84 - vflg0g8PQ 2013/08/29 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index e4a2e22bc..0c963fd20 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -429,7 +429,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
elif len(s) == 86:
return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
elif len(s) == 85:
- return s[40] + s[82:43:-1] + s[22] + s[42:40:-1] + s[83] + s[39:22:-1] + s[0] + s[21:2:-1]
+ return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
elif len(s) == 84:
return s[81:36:-1] + s[0] + s[35:2:-1]
elif len(s) == 83:
From f3f34c5b0f51b4453033ef83981ff3284c050da8 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Tue, 17 Sep 2013 17:00:20 +0200
Subject: [PATCH 100/215] release 2013.09.17
---
youtube_dl/version.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index e06b0cd6c..80ccfbd4f 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.09.16'
+__version__ = '2013.09.17'
From 4a67aafb7e725c49e7bb3bcc5aea3fb3ae5fb42d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Tue, 17 Sep 2013 20:59:55 +0200
Subject: [PATCH 101/215] [youtube] Don't search the flash player version for
videos with age gate activated
---
youtube_dl/extractor/youtube.py | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 0c963fd20..f227e2086 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -783,10 +783,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if self._downloader.params.get('verbose'):
s = url_data['s'][0]
if age_gate:
- player_version = self._search_regex(r'ad3-(.+?)\.swf',
- video_info['ad3_module'][0] if 'ad3_module' in video_info else 'NOT FOUND',
- 'flash player', fatal=False)
- player = 'flash player %s' % player_version
+ player = 'flash player'
else:
player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
'html5 player', fatal=False)
From 6523223a4c6f8924ac156b3fc2f5519a53b58e4b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Tue, 17 Sep 2013 21:10:57 +0200
Subject: [PATCH 102/215] [hotnewhiphop] Fix test case title
---
youtube_dl/extractor/hotnewhiphop.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py
index ccca1d7e0..3798118a7 100644
--- a/youtube_dl/extractor/hotnewhiphop.py
+++ b/youtube_dl/extractor/hotnewhiphop.py
@@ -7,11 +7,11 @@ from .common import InfoExtractor
class HotNewHipHopIE(InfoExtractor):
_VALID_URL = r'http://www\.hotnewhiphop.com/.*\.(?P.*)\.html'
_TEST = {
- u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html'",
+ u'url': u"http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html",
u'file': u'1435540.mp3',
u'md5': u'2c2cd2f76ef11a9b3b581e8b232f3d96',
u'info_dict': {
- u"title": u"Freddie Gibbs Songs - Lay It Down"
+ u"title": u"Freddie Gibbs - Lay It Down"
}
}
From 5d13df79a51235392bde81274c90e780041e12b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Tue, 17 Sep 2013 22:49:43 +0200
Subject: [PATCH 103/215] [francetv] Remove Pluzz test
Videos expire in 7 days
---
youtube_dl/extractor/francetv.py | 12 +-----------
1 file changed, 1 insertion(+), 11 deletions(-)
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index f2b12c884..b8fe82e47 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -34,17 +34,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
IE_NAME = u'pluzz.francetv.fr'
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
- _TEST = {
- u'url': u'http://pluzz.francetv.fr/videos/allo_rufo_saison5_,88439064.html',
- u'file': u'88439064.mp4',
- u'info_dict': {
- u'title': u'Allô Rufo',
- u'description': u'md5:d909f1ebdf963814b65772aea250400e',
- },
- u'params': {
- u'skip_download': True,
- },
- }
+ # Can't use tests, videos expire in 7 days
def _real_extract(self, url):
title = re.match(self._VALID_URL, url).group(1)
From 1237c9a3a5ef0abca961f7f2252fde7f9e99db66 Mon Sep 17 00:00:00 2001
From: rzhxeo
Date: Tue, 17 Sep 2013 06:24:20 +0200
Subject: [PATCH 104/215] XHamsterIE: Fix support for new HD video url format
and add test (closes PR #1443)
---
youtube_dl/extractor/xhamster.py | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
index fa759d30c..361619694 100644
--- a/youtube_dl/extractor/xhamster.py
+++ b/youtube_dl/extractor/xhamster.py
@@ -11,8 +11,8 @@ from ..utils import (
class XHamsterIE(InfoExtractor):
"""Information Extractor for xHamster"""
- _VALID_URL = r'(?:http://)?(?:www.)?xhamster\.com/movies/(?P[0-9]+)/.*\.html'
- _TEST = {
+ _VALID_URL = r'(?:http://)?(?:www\.)?xhamster\.com/movies/(?P[0-9]+)/(?P.+?)\.html(?:\?.*)?'
+ _TESTS = [{
u'url': u'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
u'file': u'1509445.flv',
u'md5': u'9f48e0e8d58e3076bb236ff412ab62fa',
@@ -21,13 +21,24 @@ class XHamsterIE(InfoExtractor):
u"uploader_id": u"Ruseful2011",
u"title": u"FemaleAgent Shy beauty takes the bait"
}
- }
+ },
+ {
+ u'url': u'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
+ u'file': u'2221348.flv',
+ u'md5': u'e767b9475de189320f691f49c679c4c7',
+ u'info_dict': {
+ u"upload_date": u"20130914",
+ u"uploader_id": u"jojo747400",
+ u"title": u"Britney Spears Sexy Booty"
+ }
+ }]
def _real_extract(self,url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
- mrss_url = 'http://xhamster.com/movies/%s/.html?hd' % video_id
+ seo = mobj.group('seo')
+ mrss_url = 'http://xhamster.com/movies/%s/%s.html?hd' % (video_id, seo)
webpage = self._download_webpage(mrss_url, video_id)
mobj = re.search(r'\'srv\': \'(?P[^\']*)\',\s*\'file\': \'(?P[^\']+)\',', webpage)
From 830dd1944a3db8de373fe78ac805302915caf126 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Wed, 18 Sep 2013 13:23:04 +0200
Subject: [PATCH 105/215] Clarify -i help (#1453)
---
youtube_dl/__init__.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 9efd7c3f7..e8299130c 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -149,7 +149,7 @@ def parseOpts(overrideArguments=None):
general.add_option('-U', '--update',
action='store_true', dest='update_self', help='update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
general.add_option('-i', '--ignore-errors',
- action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
+ action='store_true', dest='ignoreerrors', help='continue on download errors, for example to to skip unavailable videos in a playlist', default=False)
general.add_option('--dump-user-agent',
action='store_true', dest='dump_user_agent',
help='display the current browser identification', default=False)
From eb03f4dad3ebb0f781e6742b6c1c590506d58e5b Mon Sep 17 00:00:00 2001
From: Ruirize
Date: Wed, 18 Sep 2013 15:54:45 +0100
Subject: [PATCH 106/215] Added Newgrounds support
---
youtube_dl/extractor/__init__.py | 1 +
youtube_dl/extractor/newgrounds.py | 37 ++++++++++++++++++++++++++++++
2 files changed, 38 insertions(+)
create mode 100644 youtube_dl/extractor/newgrounds.py
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 761575062..e1ec38cf2 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -68,6 +68,7 @@ from .myvideo import MyVideoIE
from .naver import NaverIE
from .nba import NBAIE
from .nbc import NBCNewsIE
+from .newgrounds import NewgroundsIE
from .ooyala import OoyalaIE
from .orf import ORFIE
from .pbs import PBSIE
diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py
new file mode 100644
index 000000000..d19145a72
--- /dev/null
+++ b/youtube_dl/extractor/newgrounds.py
@@ -0,0 +1,37 @@
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+class NewgroundsIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P\d+)'
+ _TEST = {
+ u'url': u'http://www.newgrounds.com/audio/listen/549479',
+ u'file': u'549479_B7---BusMode.mp3',
+ u'md5': u'2924d938f60415cd7afbe7ae9042a99e',
+ u'info_dict': {
+ u"title": u"B7 - BusMode",
+ u"uploader" : u"Burn7",
+ }
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ music_id = mobj.group('id')
+ webpage = self._download_webpage(url, music_id)
+
+ title = self._html_search_regex(r',"name":"([^"]+)",', webpage, 'music title', flags=re.DOTALL)
+ uploader = self._html_search_regex(r',"artist":"([^"]+)",', webpage, 'music uploader', flags=re.DOTALL)
+
+ music_url_json_string = '{"url":"' + self._html_search_regex(r'{"url":"([^"]+)",', webpage, 'music url', flags=re.DOTALL) + '"}'
+ music_url_json = json.loads(music_url_json_string)
+ music_url = music_url_json['url']
+
+ return [{
+ 'id': music_id,
+ 'title': title,
+ 'url': music_url,
+ 'uploader': uploader,
+ 'ext': determine_ext(music_url),
+ }]
From 1ef80b55ddf05d7fe2bcba08c414aa10c524870d Mon Sep 17 00:00:00 2001
From: Ruirize
Date: Wed, 18 Sep 2013 16:23:38 +0100
Subject: [PATCH 107/215] Fixes test fail
Was unaware of --id being passed to test.
---
youtube_dl/extractor/newgrounds.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py
index d19145a72..f316b9272 100644
--- a/youtube_dl/extractor/newgrounds.py
+++ b/youtube_dl/extractor/newgrounds.py
@@ -8,7 +8,7 @@ class NewgroundsIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P\d+)'
_TEST = {
u'url': u'http://www.newgrounds.com/audio/listen/549479',
- u'file': u'549479_B7---BusMode.mp3',
+ u'file': u'549479.mp3',
u'md5': u'2924d938f60415cd7afbe7ae9042a99e',
u'info_dict': {
u"title": u"B7 - BusMode",
From a19413c311e1bd2ffef2705212a8719b7126eef9 Mon Sep 17 00:00:00 2001
From: Ruirize
Date: Wed, 18 Sep 2013 17:17:12 +0100
Subject: [PATCH 108/215] Changed file hash.
---
youtube_dl/extractor/newgrounds.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py
index f316b9272..e66294ade 100644
--- a/youtube_dl/extractor/newgrounds.py
+++ b/youtube_dl/extractor/newgrounds.py
@@ -9,7 +9,7 @@ class NewgroundsIE(InfoExtractor):
_TEST = {
u'url': u'http://www.newgrounds.com/audio/listen/549479',
u'file': u'549479.mp3',
- u'md5': u'2924d938f60415cd7afbe7ae9042a99e',
+ u'md5': u'fe6033d297591288fa1c1f780386f07a',
u'info_dict': {
u"title": u"B7 - BusMode",
u"uploader" : u"Burn7",
From d0ae9e3a8d807d0466bccc27186c8c2d86215350 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Wed, 18 Sep 2013 22:14:43 +0200
Subject: [PATCH 109/215] [newgrounds] simplify
---
youtube_dl/extractor/newgrounds.py | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py
index e66294ade..2ef80bce0 100644
--- a/youtube_dl/extractor/newgrounds.py
+++ b/youtube_dl/extractor/newgrounds.py
@@ -4,6 +4,7 @@ import re
from .common import InfoExtractor
from ..utils import determine_ext
+
class NewgroundsIE(InfoExtractor):
_VALID_URL = r'(?:https?://)?(?:www\.)?newgrounds\.com/audio/listen/(?P\d+)'
_TEST = {
@@ -12,7 +13,7 @@ class NewgroundsIE(InfoExtractor):
u'md5': u'fe6033d297591288fa1c1f780386f07a',
u'info_dict': {
u"title": u"B7 - BusMode",
- u"uploader" : u"Burn7",
+ u"uploader": u"Burn7",
}
}
@@ -21,17 +22,17 @@ class NewgroundsIE(InfoExtractor):
music_id = mobj.group('id')
webpage = self._download_webpage(url, music_id)
- title = self._html_search_regex(r',"name":"([^"]+)",', webpage, 'music title', flags=re.DOTALL)
- uploader = self._html_search_regex(r',"artist":"([^"]+)",', webpage, 'music uploader', flags=re.DOTALL)
+ title = self._html_search_regex(r',"name":"([^"]+)",', webpage, u'music title')
+ uploader = self._html_search_regex(r',"artist":"([^"]+)",', webpage, u'music uploader')
- music_url_json_string = '{"url":"' + self._html_search_regex(r'{"url":"([^"]+)",', webpage, 'music url', flags=re.DOTALL) + '"}'
+ music_url_json_string = self._html_search_regex(r'({"url":"[^"]+"),', webpage, u'music url') + '}'
music_url_json = json.loads(music_url_json_string)
music_url = music_url_json['url']
- return [{
+ return {
'id': music_id,
- 'title': title,
+ 'title': title,
'url': music_url,
'uploader': uploader,
'ext': determine_ext(music_url),
- }]
+ }
From 2dad310e2cab1913ed1a8d1072b57b46e7257b1e Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Wed, 18 Sep 2013 22:30:22 +0200
Subject: [PATCH 110/215] Credit @Ruirize for newgrounds
---
youtube_dl/__init__.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index e8299130c..df4feefe7 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -30,6 +30,7 @@ __authors__ = (
'Pierre Rudloff',
'Huarong Huo',
'Ismael Mejía',
+ 'Steffan \'Ruirize\' James',
)
__license__ = 'Public Domain'
From 71c82637e7add9b437bc6dbe03035d6d8aae82e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Wed, 18 Sep 2013 23:00:32 +0200
Subject: [PATCH 111/215] [youtube] apply the fix for lists with number of
videos multiple of _MAX_RESULTS to user extraction
Copied from the playlist extractor.
---
youtube_dl/extractor/youtube.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index f227e2086..23a8097c5 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1005,6 +1005,9 @@ class YoutubeUserIE(InfoExtractor):
response = json.loads(page)
except ValueError as err:
raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
+ if 'entry' not in response['feed']:
+ # Number of videos is a multiple of self._MAX_RESULTS
+ break
# Extract video identifiers
ids_in_page = []
From c5e743f66f5637fe02fe0b5167fab99a06b903e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Wed, 18 Sep 2013 23:32:37 +0200
Subject: [PATCH 112/215] [fktv] support videos splitted in any number of parts
and some style changes
---
youtube_dl/extractor/fktv.py | 57 ++++++++++++++++++++++++------------
1 file changed, 39 insertions(+), 18 deletions(-)
diff --git a/youtube_dl/extractor/fktv.py b/youtube_dl/extractor/fktv.py
index 239d9df38..9c89362ef 100644
--- a/youtube_dl/extractor/fktv.py
+++ b/youtube_dl/extractor/fktv.py
@@ -1,37 +1,58 @@
-import re,random
+import re
+import random
+import json
from .common import InfoExtractor
from ..utils import (
determine_ext,
+ get_element_by_id,
+ clean_html,
)
+
class FKTVIE(InfoExtractor):
- """Information Extractor for Fernsehkritik-TV"""
+ IE_NAME = u'fernsehkritik.tv'
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/folge-(?P[0-9]+)(?:/.*)?'
- def _real_extract(self,url):
+ _TEST = {
+ u'url': u'http://fernsehkritik.tv/folge-1',
+ u'file': u'00011.flv',
+ u'info_dict': {
+ u'title': u'Folge 1 vom 10. April 2007',
+ u'description': u'md5:fb4818139c7cfe6907d4b83412a6864f',
+ },
+ }
+
+ def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
episode = int(mobj.group('ep'))
-
- server = random.randint(2,4)
+
+ server = random.randint(2, 4)
video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%d.jpg' % episode
+ start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%d/Start' % episode,
+ episode)
+ playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage,
+ u'playlist', flags=re.DOTALL)
+ files = json.loads(re.sub('{[^{}]*?}', '{}', playlist))
+ # TODO: return a single multipart video
videos = []
- # Download all three parts
- for i in range(1,4):
+ for i, _ in enumerate(files, 1):
video_id = '%04d%d' % (episode, i)
- video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i==1 else '-%d'%i)
+ video_url = 'http://dl%d.fernsehkritik.tv/fernsehkritik%d%s.flv' % (server, episode, '' if i == 1 else '-%d' % i)
video_title = 'Fernsehkritik %d.%d' % (episode, i)
videos.append({
- 'id': video_id,
- 'url': video_url,
- 'ext': determine_ext(video_url),
- 'title': video_title,
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': determine_ext(video_url),
+ 'title': clean_html(get_element_by_id('eptitle', start_webpage)),
+ 'description': clean_html(get_element_by_id('contentlist', start_webpage)),
'thumbnail': video_thumbnail
})
return videos
+
class FKTVPosteckeIE(InfoExtractor):
- """Information Extractor for Fernsehkritik-TV Postecke"""
+ IE_NAME = u'fernsehkritik.tv:postecke'
_VALID_URL = r'(?:http://)?(?:www\.)?fernsehkritik.tv/inline-video/postecke.php\?(.*&)?ep=(?P[0-9]+)(&|$)'
_TEST = {
u'url': u'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120',
@@ -42,17 +63,17 @@ class FKTVPosteckeIE(InfoExtractor):
}
}
- def _real_extract(self,url):
+ def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
episode = int(mobj.group('ep'))
-
- server = random.randint(2,4)
+
+ server = random.randint(2, 4)
video_id = '%04d' % episode
video_url = 'http://dl%d.fernsehkritik.tv/postecke/postecke%d.flv' % (server, episode)
video_title = 'Postecke %d' % episode
- return[{
+ return {
'id': video_id,
'url': video_url,
'ext': determine_ext(video_url),
'title': video_title,
- }]
+ }
From bc4b9008981096184739666941e73c8d09623502 Mon Sep 17 00:00:00 2001
From: patrickslin
Date: Thu, 19 Sep 2013 21:49:06 -0700
Subject: [PATCH 113/215] Unable to decrypt signature length 93 (fixes #1461)
---
youtube_dl/extractor/youtube.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 23a8097c5..e5f536e6f 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -416,7 +416,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
def _decrypt_signature(self, s):
"""Turn the encrypted s field into a working signature"""
- if len(s) == 92:
+ if len(s) == 93:
+ return s[86:29:-1] + s[88] + s[28:5:-1]
+ elif len(s) == 92:
return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
elif len(s) == 90:
return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
From 63037593c0cc3d5da4065368736d74fd594cb1fc Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Fri, 20 Sep 2013 10:24:48 +0200
Subject: [PATCH 114/215] release 2013.09.20
---
README.md | 3 ++-
youtube_dl/version.py | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 400e6cd48..f54945acc 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,8 @@ which means you can modify it, redistribute it or use it however you like.
-U, --update update this program to latest version. Make sure
that you have sufficient permissions (run with
sudo if needed)
- -i, --ignore-errors continue on download errors
+ -i, --ignore-errors continue on download errors, for example to to
+ skip unavailable videos in a playlist
--dump-user-agent display the current browser identification
--user-agent UA specify a custom user agent
--referer REF specify a custom referer, use if the video access
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 80ccfbd4f..a79664521 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.09.17'
+__version__ = '2013.09.20'
From 1a810f0d4e63ba702e49b7404c3f5f74ef716759 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Fri, 20 Sep 2013 13:05:34 +0200
Subject: [PATCH 115/215] [funnyordie] Fix video url extraction
---
youtube_dl/extractor/funnyordie.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py
index f3d86a711..2ccdb7073 100644
--- a/youtube_dl/extractor/funnyordie.py
+++ b/youtube_dl/extractor/funnyordie.py
@@ -21,7 +21,8 @@ class FunnyOrDieIE(InfoExtractor):
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
- video_url = self._search_regex(r'type="video/mp4" src="(.*?)"',
+ video_url = self._search_regex(
+ [r'type="video/mp4" src="(.*?)"', r'src="([^>]*?)" type=\'video/mp4\''],
webpage, u'video URL', flags=re.DOTALL)
info = {
From c40c6aaaaa80db619459be3bd7f93853da70be0d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Fri, 20 Sep 2013 13:26:03 +0200
Subject: [PATCH 116/215] Catch socket.error before IOError
Since python 2.6 it's a child class.
---
youtube_dl/YoutubeDL.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index de2b133e0..d999099fe 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -544,11 +544,11 @@ class YoutubeDL(object):
else:
try:
success = self.fd._do_download(filename, info_dict)
- except (OSError, IOError) as err:
- raise UnavailableVideoError(err)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_error(u'unable to download video data: %s' % str(err))
return
+ except (OSError, IOError) as err:
+ raise UnavailableVideoError(err)
except (ContentTooShortError, ) as err:
self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
return
From 38d025b3f0f6f349c36a4531f3b36d7e7553f417 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Fri, 20 Sep 2013 14:43:16 +0200
Subject: [PATCH 117/215] [youtube] add algo for length 91
---
devscripts/youtube_genalgo.py | 3 +++
youtube_dl/extractor/youtube.py | 2 ++
2 files changed, 5 insertions(+)
diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py
index d4546758d..f91e8855d 100644
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -12,6 +12,9 @@ tests = [
# 92 - vflQw-fB4 2013/07/17
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"",
"mrtyuioplkjhgfdsazxcvbnq1234567890QWERTY}IOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]\"|:;"),
+ # 91 - vfl79wBKW 2013/07/20 (sporadic)
+ ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~",
+ "/?;:|}][{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543.1mnbvcxzasdfghjklpoiu"),
# 90
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
"mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index e5f536e6f..47d5cb7ff 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -420,6 +420,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return s[86:29:-1] + s[88] + s[28:5:-1]
elif len(s) == 92:
return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
+ elif len(s) == 91:
+ return s[84:27:-1] + s[86] + s[26:5:-1]
elif len(s) == 90:
return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
elif len(s) == 89:
From 3d60bb96e138ce8221f35b7f9d1e1b28f235083e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Fri, 20 Sep 2013 16:55:50 +0200
Subject: [PATCH 118/215] Add an extractor for ebaumsworld.com (closes #1462)
---
youtube_dl/extractor/__init__.py | 1 +
youtube_dl/extractor/ebaumsworld.py | 37 +++++++++++++++++++++++++++++
2 files changed, 38 insertions(+)
create mode 100644 youtube_dl/extractor/ebaumsworld.py
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 726c9fa15..c6a55f194 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -24,6 +24,7 @@ from .depositfiles import DepositFilesIE
from .dotsub import DotsubIE
from .dreisat import DreiSatIE
from .defense import DefenseGouvFrIE
+from .ebaumsworld import EbaumsWorldIE
from .ehow import EHowIE
from .eighttracks import EightTracksIE
from .escapist import EscapistIE
diff --git a/youtube_dl/extractor/ebaumsworld.py b/youtube_dl/extractor/ebaumsworld.py
new file mode 100644
index 000000000..f02c6998b
--- /dev/null
+++ b/youtube_dl/extractor/ebaumsworld.py
@@ -0,0 +1,37 @@
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+
+class EbaumsWorldIE(InfoExtractor):
+ _VALID_URL = r'https?://www\.ebaumsworld\.com/video/watch/(?P\d+)'
+
+ _TEST = {
+ u'url': u'http://www.ebaumsworld.com/video/watch/83367677/',
+ u'file': u'83367677.mp4',
+ u'info_dict': {
+ u'title': u'A Giant Python Opens The Door',
+ u'description': u'This is how nightmares start...',
+ u'uploader': u'jihadpizza',
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ config_xml = self._download_webpage(
+ 'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
+ config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
+ video_url = config.find('file').text
+
+ return {
+ 'id': video_id,
+ 'title': config.find('title').text,
+ 'url': video_url,
+ 'ext': determine_ext(video_url),
+ 'description': config.find('description').text,
+ 'thumbnail': config.find('image').text,
+ 'uploader': config.find('username').text,
+ }
From 58f289d013fb3d225488b43deb8216eee9154857 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Fri, 20 Sep 2013 22:59:14 +0200
Subject: [PATCH 119/215] release 2013.09.20.1
---
youtube_dl/version.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index a79664521..88d70b47a 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.09.20'
+__version__ = '2013.09.20.1'
From 0fd49457f5257dbe317c69314ee57a6c485d41a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sat, 21 Sep 2013 10:51:25 +0200
Subject: [PATCH 120/215] [southparkstudios] Fix mgid extraction
---
youtube_dl/extractor/southparkstudios.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southparkstudios.py
index a5dc754dd..25f799a27 100644
--- a/youtube_dl/extractor/southparkstudios.py
+++ b/youtube_dl/extractor/southparkstudios.py
@@ -14,7 +14,7 @@ class SouthParkStudiosIE(MTVIE):
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
u'info_dict': {
u'title': u'Bat Daded',
- u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
+ u'description': u'Randy finally gets the chance to fight Bat Dad and gets the boys disqualified from the season championships.',
},
}
@@ -29,6 +29,6 @@ class SouthParkStudiosIE(MTVIE):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
- mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
+ mgid = self._search_regex(r'data-mgid="(mgid:.*?)"',
webpage, u'mgid')
return self._get_videos_info(mgid)
From 69b227a9bc75a75e9156f05d08c3c69337be64ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sat, 21 Sep 2013 10:58:43 +0200
Subject: [PATCH 121/215] [southparkstudios] add support for
http://www.southparkstudios.com/full-episodes/* urls (closes #1469)
---
youtube_dl/extractor/southparkstudios.py | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southparkstudios.py
index 25f799a27..1a611d3bb 100644
--- a/youtube_dl/extractor/southparkstudios.py
+++ b/youtube_dl/extractor/southparkstudios.py
@@ -5,7 +5,7 @@ from .mtv import MTVIE, _media_xml_tag
class SouthParkStudiosIE(MTVIE):
IE_NAME = u'southparkstudios.com'
- _VALID_URL = r'https?://www\.southparkstudios\.com/clips/(?P\d+)'
+ _VALID_URL = r'https?://www\.southparkstudios\.com/(clips|full-episodes)/(?P.+?)(\?|#|$)'
_FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
@@ -23,7 +23,11 @@ class SouthParkStudiosIE(MTVIE):
def _get_thumbnail_url(self, uri, itemdoc):
search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
- return itemdoc.find(search_path).attrib['url']
+ thumb_node = itemdoc.find(search_path)
+ if thumb_node is None:
+ return None
+ else:
+ return thumb_node.attrib['url']
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
From b61067fa4f6c3bd69452b2530ccdf277e0e23e8b Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sat, 21 Sep 2013 11:10:22 +0200
Subject: [PATCH 122/215] Abort if extractaudio is given without a variable
extension (#1470)
---
youtube_dl/__init__.py | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index df4feefe7..1ed30aae3 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -551,6 +551,10 @@ def _real_main(argv=None):
or (opts.useid and u'%(id)s.%(ext)s')
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
or u'%(title)s-%(id)s.%(ext)s')
+ if '%(ext)s' not in outtmpl and opts.extractaudio:
+ parser.error(u'Cannot download a video and extract audio into the same'
+ u' file! Use "%%(ext)s" instead of %r' %
+ determine_ext(outtmpl, u''))
# YoutubeDL
ydl = YoutubeDL({
From 34308b30d6c2b05819e362deab94ce590c325e67 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sat, 21 Sep 2013 11:48:07 +0200
Subject: [PATCH 123/215] Warn if no locale is set (#1474)
---
youtube_dl/YoutubeDL.py | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index d999099fe..fa24ebe0d 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -104,6 +104,17 @@ class YoutubeDL(object):
self._download_retcode = 0
self._num_downloads = 0
self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
+
+ if (sys.version_info >= (3,) and sys.platform != 'win32' and
+ sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
+ and not params['restrictfilenames']):
+ # On Python 3, the Unicode filesystem API will throw errors (#1474)
+ self.report_warning(
+ u'Assuming --restrict-filenames isnce file system encoding '
+ u'cannot encode all charactes. '
+ u'Set the LC_ALL environment variable to fix this.')
+ params['restrictfilenames'] = True
+
self.params = params
self.fd = FileDownloader(self, self.params)
From 3a1d48d6de0159807ff57b2cec6766cbfd400f00 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sat, 21 Sep 2013 12:15:54 +0200
Subject: [PATCH 124/215] [dailymotion] Raise ExtractorError if the dailymotion
response reports an error
---
youtube_dl/extractor/dailymotion.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 360113f9c..ce7057a26 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -63,6 +63,9 @@ class DailymotionIE(SubtitlesInfoExtractor):
info = self._search_regex(r'var info = ({.*?}),$', embed_page,
'video info', flags=re.MULTILINE)
info = json.loads(info)
+ if info.get('error') is not None:
+ msg = 'Couldn\'t get video, Dailymotion says: %s' % info['error']['title']
+ raise ExtractorError(msg, expected=True)
# TODO: support choosing qualities
From 39baacc49f323adc639d502d38a016ebd63acd75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sat, 21 Sep 2013 12:45:53 +0200
Subject: [PATCH 125/215] [dailymotion] Add an extractor for users (closes
#1476)
---
test/test_playlists.py | 16 ++++++++++-
youtube_dl/extractor/__init__.py | 6 +++-
youtube_dl/extractor/dailymotion.py | 44 +++++++++++++++++++++++------
3 files changed, 55 insertions(+), 11 deletions(-)
diff --git a/test/test_playlists.py b/test/test_playlists.py
index d079a4f23..e22054d69 100644
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -1,4 +1,5 @@
#!/usr/bin/env python
+# encoding: utf-8
import sys
import unittest
@@ -8,7 +9,13 @@ import json
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE, UstreamChannelIE, SoundcloudUserIE
+from youtube_dl.extractor import (
+ DailymotionPlaylistIE,
+ DailymotionUserIE,
+ VimeoChannelIE,
+ UstreamChannelIE,
+ SoundcloudUserIE,
+)
from youtube_dl.utils import *
from helper import FakeYDL
@@ -25,6 +32,13 @@ class TestPlaylists(unittest.TestCase):
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'SPORT')
self.assertTrue(len(result['entries']) > 20)
+ def test_dailymotion_user(self):
+ dl = FakeYDL()
+ ie = DailymotionUserIE(dl)
+ result = ie.extract('http://www.dailymotion.com/user/generation-quoi/')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['title'], u'Génération Quoi')
+ self.assertTrue(len(result['entries']) >= 26)
def test_vimeo_channel(self):
dl = FakeYDL()
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index c6a55f194..949f59a44 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -18,7 +18,11 @@ from .comedycentral import ComedyCentralIE
from .condenast import CondeNastIE
from .criterion import CriterionIE
from .cspan import CSpanIE
-from .dailymotion import DailymotionIE, DailymotionPlaylistIE
+from .dailymotion import (
+ DailymotionIE,
+ DailymotionPlaylistIE,
+ DailymotionUserIE,
+)
from .daum import DaumIE
from .depositfiles import DepositFilesIE
from .dotsub import DotsubIE
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index ce7057a26..64b89aae8 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -114,28 +114,54 @@ class DailymotionIE(SubtitlesInfoExtractor):
class DailymotionPlaylistIE(InfoExtractor):
+ IE_NAME = u'dailymotion:playlist'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P.+?)/'
_MORE_PAGES_INDICATOR = r''
+ _PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- playlist_id = mobj.group('id')
+ def _extract_entries(self, id):
video_ids = []
-
for pagenum in itertools.count(1):
- webpage = self._download_webpage('https://www.dailymotion.com/playlist/%s/%s' % (playlist_id, pagenum),
- playlist_id, u'Downloading page %s' % pagenum)
+ webpage = self._download_webpage(self._PAGE_TEMPLATE % (id, pagenum),
+ id, u'Downloading page %s' % pagenum)
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
video_ids.extend(re.findall(r'data-id="(.+?)" data-ext-id', playlist_el))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
-
- entries = [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
+ return [self.url_result('http://www.dailymotion.com/video/%s' % video_id, 'Dailymotion')
for video_id in video_ids]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ playlist_id = mobj.group('id')
+ webpage = self._download_webpage(url, playlist_id)
+
return {'_type': 'playlist',
'id': playlist_id,
'title': get_element_by_id(u'playlist_name', webpage),
- 'entries': entries,
+ 'entries': self._extract_entries(playlist_id),
}
+
+
+class DailymotionUserIE(DailymotionPlaylistIE):
+ IE_NAME = u'dailymotion:user'
+ _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/user/(?P[^/]+)'
+ _MORE_PAGES_INDICATOR = r''
+ _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ user = mobj.group('user')
+ webpage = self._download_webpage(url, user)
+ full_user = self._html_search_regex(
+ r'(.*?)' % re.escape(user),
+ webpage, u'user', flags=re.DOTALL)
+
+ return {
+ '_type': 'playlist',
+ 'id': user,
+ 'title': full_user,
+ 'entries': self._extract_entries(user),
+ }
From b00ca882a4c1069de1ec2d04ffd50905c0f8b97f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sat, 21 Sep 2013 13:50:52 +0200
Subject: [PATCH 126/215] [livestream] Fix events extraction (fixes #1467)
---
test/test_playlists.py | 10 ++++++++++
youtube_dl/extractor/livestream.py | 14 +++++++++++---
2 files changed, 21 insertions(+), 3 deletions(-)
diff --git a/test/test_playlists.py b/test/test_playlists.py
index e22054d69..c33511333 100644
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -15,6 +15,7 @@ from youtube_dl.extractor import (
VimeoChannelIE,
UstreamChannelIE,
SoundcloudUserIE,
+ LivestreamIE,
)
from youtube_dl.utils import *
@@ -32,6 +33,7 @@ class TestPlaylists(unittest.TestCase):
self.assertIsPlaylist(result)
self.assertEqual(result['title'], u'SPORT')
self.assertTrue(len(result['entries']) > 20)
+
def test_dailymotion_user(self):
dl = FakeYDL()
ie = DailymotionUserIE(dl)
@@ -64,5 +66,13 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], u'9615865')
self.assertTrue(len(result['entries']) >= 12)
+ def test_livestream_event(self):
+ dl = FakeYDL()
+ ie = LivestreamIE(dl)
+ result = ie.extract('http://new.livestream.com/tedx/cityenglish')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['title'], u'TEDCity2.0 (English)')
+ self.assertTrue(len(result['entries']) >= 4)
+
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py
index 309921078..d04da98c8 100644
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -2,7 +2,12 @@ import re
import json
from .common import InfoExtractor
-from ..utils import compat_urllib_parse_urlparse, compat_urlparse
+from ..utils import (
+ compat_urllib_parse_urlparse,
+ compat_urlparse,
+ get_meta_content,
+ ExtractorError,
+)
class LivestreamIE(InfoExtractor):
@@ -35,8 +40,11 @@ class LivestreamIE(InfoExtractor):
if video_id is None:
# This is an event page:
- api_url = self._search_regex(r'event_design_eventId: \'(.+?)\'',
- webpage, 'api url')
+ player = get_meta_content('twitter:player', webpage)
+ if player is None:
+ raise ExtractorError('Couldn\'t extract event api url')
+ api_url = player.replace('/player', '')
+ api_url = re.sub(r'^(https?://)(new\.)', r'\1api.\2', api_url)
info = json.loads(self._download_webpage(api_url, event_name,
u'Downloading event info'))
videos = [self._extract_video_info(video_data['data'])
From e0df6211cc9364f62406b2907fa830847324db53 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sat, 21 Sep 2013 14:19:30 +0200
Subject: [PATCH 127/215] Restore accidentally deleted commits
That's what happens if you let Windows machines write :(
---
.gitignore | 1 +
test/test_youtube_signature.py | 80 +++++
youtube_dl/extractor/youtube.py | 611 ++++++++++++++++++++++++++++++--
youtube_dl/utils.py | 6 +
4 files changed, 677 insertions(+), 21 deletions(-)
create mode 100644 test/test_youtube_signature.py
diff --git a/.gitignore b/.gitignore
index 61cb6bc3c..24fdb3626 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,3 +24,4 @@ updates_key.pem
*.flv
*.mp4
*.part
+test/testdata
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
new file mode 100644
index 000000000..2c06caef4
--- /dev/null
+++ b/test/test_youtube_signature.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+
+import io
+import re
+import string
+import sys
+import unittest
+
+# Allow direct execution
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from youtube_dl.extractor import YoutubeIE
+from youtube_dl.utils import compat_str, compat_urlretrieve
+
+_TESTS = [
+ (
+ u'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js',
+ u'js',
+ 86,
+ u'>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321',
+ ),
+ (
+ u'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js',
+ u'js',
+ 85,
+ u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@',
+ ),
+ (
+ u'https://s.ytimg.com/yts/swfbin/watch_as3-vflg5GhxU.swf',
+ u'swf',
+ 82,
+ u'23456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!?#$%&\'()*+,-./:;<=>"'
+ ),
+]
+
+
+class TestSignature(unittest.TestCase):
+ def setUp(self):
+ TEST_DIR = os.path.dirname(os.path.abspath(__file__))
+ self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata')
+ if not os.path.exists(self.TESTDATA_DIR):
+ os.mkdir(self.TESTDATA_DIR)
+
+
+def make_testfunc(url, stype, sig_length, expected_sig):
+ basename = url.rpartition('/')[2]
+ m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename)
+ assert m, '%r should follow URL format' % basename
+ test_id = m.group(1)
+
+ def test_func(self):
+ fn = os.path.join(self.TESTDATA_DIR, basename)
+
+ if not os.path.exists(fn):
+ compat_urlretrieve(url, fn)
+
+ ie = YoutubeIE()
+ if stype == 'js':
+ with io.open(fn, encoding='utf-8') as testf:
+ jscode = testf.read()
+ func = ie._parse_sig_js(jscode)
+ else:
+ assert stype == 'swf'
+ with open(fn, 'rb') as testf:
+ swfcode = testf.read()
+ func = ie._parse_sig_swf(swfcode)
+ src_sig = compat_str(string.printable[:sig_length])
+ got_sig = func(src_sig)
+ self.assertEqual(got_sig, expected_sig)
+
+ test_func.__name__ = str('test_signature_' + stype + '_' + test_id)
+ setattr(TestSignature, test_func.__name__, test_func)
+
+for test_spec in _TESTS:
+ make_testfunc(*test_spec)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 47d5cb7ff..456d3cb0f 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1,11 +1,16 @@
# coding: utf-8
+import collections
+import itertools
+import io
import json
import netrc
import re
import socket
-import itertools
-import xml.etree.ElementTree
+import string
+import struct
+import traceback
+import zlib
from .common import InfoExtractor, SearchInfoExtractor
from .subtitles import SubtitlesInfoExtractor
@@ -393,6 +398,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if YoutubePlaylistIE.suitable(url): return False
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
+ def __init__(self, *args, **kwargs):
+ super(YoutubeIE, self).__init__(*args, **kwargs)
+ self._jsplayer_cache = {}
+
def report_video_webpage_download(self, video_id):
"""Report attempt to download video webpage."""
self.to_screen(u'%s: Downloading video webpage' % video_id)
@@ -413,15 +422,565 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
"""Indicate the download will use the RTMP protocol."""
self.to_screen(u'RTMP download detected')
- def _decrypt_signature(self, s):
+ def _extract_signature_function(self, video_id, player_url):
+ id_m = re.match(r'.*-(?P[^.]+)\.(?P[^.]+)$', player_url)
+ player_type = id_m.group('ext')
+ player_id = id_m.group('id')
+
+ if player_type == 'js':
+ code = self._download_webpage(
+ player_url, video_id,
+ note=u'Downloading %s player %s' % (player_type, jsplayer_id),
+ errnote=u'Download of %s failed' % player_url)
+ return self._parse_sig_js(code)
+ elif player_tpye == 'swf':
+ urlh = self._request_webpage(
+ player_url, video_id,
+ note=u'Downloading %s player %s' % (player_type, jsplayer_id),
+ errnote=u'Download of %s failed' % player_url)
+ code = urlh.read()
+ return self._parse_sig_swf(code)
+ else:
+ assert False, 'Invalid player type %r' % player_type
+
+ def _parse_sig_js(self, jscode):
+ funcname = self._search_regex(
+ r'signature=([a-zA-Z]+)', jscode,
+ u'Initial JS player signature function name')
+
+ functions = {}
+
+ def argidx(varname):
+ return string.lowercase.index(varname)
+
+ def interpret_statement(stmt, local_vars, allow_recursion=20):
+ if allow_recursion < 0:
+ raise ExctractorError(u'Recursion limit reached')
+
+ if stmt.startswith(u'var '):
+ stmt = stmt[len(u'var '):]
+ ass_m = re.match(r'^(?P[a-z]+)(?:\[(?P[^\]]+)\])?' +
+ r'=(?P.*)$', stmt)
+ if ass_m:
+ if ass_m.groupdict().get('index'):
+ def assign(val):
+ lvar = local_vars[ass_m.group('out')]
+ idx = interpret_expression(ass_m.group('index'),
+ local_vars, allow_recursion)
+ assert isinstance(idx, int)
+ lvar[idx] = val
+ return val
+ expr = ass_m.group('expr')
+ else:
+ def assign(val):
+ local_vars[ass_m.group('out')] = val
+ return val
+ expr = ass_m.group('expr')
+ elif stmt.startswith(u'return '):
+ assign = lambda v: v
+ expr = stmt[len(u'return '):]
+ else:
+ raise ExtractorError(
+ u'Cannot determine left side of statement in %r' % stmt)
+
+ v = interpret_expression(expr, local_vars, allow_recursion)
+ return assign(v)
+
+ def interpret_expression(expr, local_vars, allow_recursion):
+ if expr.isdigit():
+ return int(expr)
+
+ if expr.isalpha():
+ return local_vars[expr]
+
+ m = re.match(r'^(?P[a-z]+)\.(?P.*)$', expr)
+ if m:
+ member = m.group('member')
+ val = local_vars[m.group('in')]
+ if member == 'split("")':
+ return list(val)
+ if member == 'join("")':
+ return u''.join(val)
+ if member == 'length':
+ return len(val)
+ if member == 'reverse()':
+ return val[::-1]
+ slice_m = re.match(r'slice\((?P.*)\)', member)
+ if slice_m:
+ idx = interpret_expression(
+ slice_m.group('idx'), local_vars, allow_recursion-1)
+ return val[idx:]
+
+ m = re.match(
+ r'^(?P[a-z]+)\[(?P.+)\]$', expr)
+ if m:
+ val = local_vars[m.group('in')]
+ idx = interpret_expression(m.group('idx'), local_vars,
+ allow_recursion-1)
+ return val[idx]
+
+ m = re.match(r'^(?P.+?)(?P[%])(?P.+?)$', expr)
+ if m:
+ a = interpret_expression(m.group('a'),
+ local_vars, allow_recursion)
+ b = interpret_expression(m.group('b'),
+ local_vars, allow_recursion)
+ return a % b
+
+ m = re.match(
+ r'^(?P[a-zA-Z]+)\((?P[a-z0-9,]+)\)$', expr)
+ if m:
+ fname = m.group('func')
+ if fname not in functions:
+ functions[fname] = extract_function(fname)
+ argvals = [int(v) if v.isdigit() else local_vars[v]
+ for v in m.group('args').split(',')]
+ return functions[fname](argvals)
+ raise ExtractorError(u'Unsupported JS expression %r' % expr)
+
+ def extract_function(funcname):
+ func_m = re.search(
+ r'function ' + re.escape(funcname) +
+ r'\((?P[a-z,]+)\){(?P[^}]+)}',
+ jscode)
+ argnames = func_m.group('args').split(',')
+
+ def resf(args):
+ local_vars = dict(zip(argnames, args))
+ for stmt in func_m.group('code').split(';'):
+ res = interpret_statement(stmt, local_vars)
+ return res
+ return resf
+
+ initial_function = extract_function(funcname)
+ return lambda s: initial_function([s])
+
+ def _parse_sig_swf(self, file_contents):
+ if file_contents[1:3] != b'WS':
+ raise ExtractorError(
+ u'Not an SWF file; header is %r' % file_contents[:3])
+ if file_contents[:1] == b'C':
+ content = zlib.decompress(file_contents[8:])
+ else:
+ raise NotImplementedError(u'Unsupported compression format %r' %
+ file_contents[:1])
+
+ def extract_tags(content):
+ pos = 0
+ while pos < len(content):
+ header16 = struct.unpack('> 6
+ tag_len = header16 & 0x3f
+ if tag_len == 0x3f:
+ tag_len = struct.unpack('> 4
+ methods = {}
+ if kind in [0x00, 0x06]: # Slot or Const
+ _, pos = u30(pos=pos) # Slot id
+ type_name_idx, pos = u30(pos=pos)
+ vindex, pos = u30(pos=pos)
+ if vindex != 0:
+ _, pos = read_byte(pos=pos) # vkind
+ elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
+ _, pos = u30(pos=pos) # disp_id
+ method_idx, pos = u30(pos=pos)
+ methods[multinames[trait_name_idx]] = method_idx
+ elif kind == 0x04: # Class
+ _, pos = u30(pos=pos) # slot_id
+ _, pos = u30(pos=pos) # classi
+ elif kind == 0x05: # Function
+ _, pos = u30(pos=pos) # slot_id
+ function_idx, pos = u30(pos=pos)
+ methods[function_idx] = multinames[trait_name_idx]
+ else:
+ raise ExtractorError(u'Unsupported trait kind %d' % kind)
+
+ if attrs & 0x4 != 0: # Metadata present
+ metadata_count, pos = u30(pos=pos)
+ for _c3 in range(metadata_count):
+ _, pos = u30(pos=pos)
+
+ return (methods, pos)
+
+ # Classes
+ TARGET_CLASSNAME = u'SignatureDecipher'
+ searched_idx = multinames.index(TARGET_CLASSNAME)
+ searched_class_id = None
+ class_count, p = u30()
+ for class_id in range(class_count):
+ name_idx, p = u30()
+ if name_idx == searched_idx:
+ # We found the class we're looking for!
+ searched_class_id = class_id
+ _, p = u30() # super_name idx
+ flags, p = read_byte()
+ if flags & 0x08 != 0: # Protected namespace is present
+ protected_ns_idx, p = u30()
+ intrf_count, p = u30()
+ for _c2 in range(intrf_count):
+ _, p = u30()
+ _, p = u30() # iinit
+ trait_count, p = u30()
+ for _c2 in range(trait_count):
+ _, p = parse_traits_info()
+
+ if searched_class_id is None:
+ raise ExtractorError(u'Target class %r not found' %
+ TARGET_CLASSNAME)
+
+ method_names = {}
+ method_idxs = {}
+ for class_id in range(class_count):
+ _, p = u30() # cinit
+ trait_count, p = u30()
+ for _c2 in range(trait_count):
+ trait_methods, p = parse_traits_info()
+ if class_id == searched_class_id:
+ method_names.update(trait_methods.items())
+ method_idxs.update(dict(
+ (idx, name)
+ for name, idx in trait_methods.items()))
+
+ # Scripts
+ script_count, p = u30()
+ for _c in range(script_count):
+ _, p = u30() # init
+ trait_count, p = u30()
+ for _c2 in range(trait_count):
+ _, p = parse_traits_info()
+
+ # Method bodies
+ method_body_count, p = u30()
+ Method = collections.namedtuple('Method', ['code', 'local_count'])
+ methods = {}
+ for _c in range(method_body_count):
+ method_idx, p = u30()
+ max_stack, p = u30()
+ local_count, p = u30()
+ init_scope_depth, p = u30()
+ max_scope_depth, p = u30()
+ code_length, p = u30()
+ if method_idx in method_idxs:
+ m = Method(code_tag[p:p+code_length], local_count)
+ methods[method_idxs[method_idx]] = m
+ p += code_length
+ exception_count, p = u30()
+ for _c2 in range(exception_count):
+ _, p = u30() # from
+ _, p = u30() # to
+ _, p = u30() # target
+ _, p = u30() # exc_type
+ _, p = u30() # var_name
+ trait_count, p = u30()
+ for _c2 in range(trait_count):
+ _, p = parse_traits_info()
+
+ assert p == len(code_tag)
+ assert len(methods) == len(method_idxs)
+
+ method_pyfunctions = {}
+
+ def extract_function(func_name):
+ if func_name in method_pyfunctions:
+ return method_pyfunctions[func_name]
+ if func_name not in methods:
+ raise ExtractorError(u'Cannot find function %r' % func_name)
+ m = methods[func_name]
+
+ def resfunc(args):
+ print('Entering function %s(%r)' % (func_name, args))
+ registers = ['(this)'] + list(args) + [None] * m.local_count
+ stack = []
+ coder = io.BytesIO(m.code)
+ while True:
+ opcode = struct.unpack('!B', coder.read(1))[0]
+ if opcode == 208: # getlocal_0
+ stack.append(registers[0])
+ elif opcode == 209: # getlocal_1
+ stack.append(registers[1])
+ elif opcode == 210: # getlocal_2
+ stack.append(registers[2])
+ elif opcode == 36: # pushbyte
+ v = struct.unpack('!B', coder.read(1))[0]
+ stack.append(v)
+ elif opcode == 44: # pushstring
+ idx = u30(coder)
+ stack.append(constant_strings[idx])
+ elif opcode == 48: # pushscope
+ # We don't implement the scope register, so we'll just
+ # ignore the popped value
+ stack.pop()
+ elif opcode == 70: # callproperty
+ index = u30(coder)
+ mname = multinames[index]
+ arg_count = u30(coder)
+ args = list(reversed(
+ [stack.pop() for _ in range(arg_count)]))
+ obj = stack.pop()
+ if mname == u'split':
+ assert len(args) == 1
+ assert isinstance(args[0], compat_str)
+ assert isinstance(obj, compat_str)
+ if args[0] == u'':
+ res = list(obj)
+ else:
+ res = obj.split(args[0])
+ stack.append(res)
+ elif mname in method_pyfunctions:
+ stack.append(method_pyfunctions[mname](args))
+ else:
+ raise NotImplementedError(
+ u'Unsupported property %r on %r'
+ % (mname, obj))
+ elif opcode == 93: # findpropstrict
+ index = u30(coder)
+ mname = multinames[index]
+ res = extract_function(mname)
+ stack.append(res)
+ elif opcode == 97: # setproperty
+ index = u30(coder)
+ value = stack.pop()
+ idx = stack.pop()
+ obj = stack.pop()
+ assert isinstance(obj, list)
+ assert isinstance(idx, int)
+ obj[idx] = value
+ elif opcode == 98: # getlocal
+ index = u30(coder)
+ stack.append(registers[index])
+ elif opcode == 99: # setlocal
+ index = u30(coder)
+ value = stack.pop()
+ registers[index] = value
+ elif opcode == 102: # getproperty
+ index = u30(coder)
+ pname = multinames[index]
+ if pname == u'length':
+ obj = stack.pop()
+ assert isinstance(obj, list)
+ stack.append(len(obj))
+ else: # Assume attribute access
+ idx = stack.pop()
+ assert isinstance(idx, int)
+ obj = stack.pop()
+ assert isinstance(obj, list)
+ stack.append(obj[idx])
+ elif opcode == 128: # coerce
+ _ = u30(coder)
+ elif opcode == 133: # coerce_s
+ assert isinstance(stack[-1], (type(None), compat_str))
+ elif opcode == 164: # modulo
+ value2 = stack.pop()
+ value1 = stack.pop()
+ res = value1 % value2
+ stack.append(res)
+ elif opcode == 214: # setlocal_2
+ registers[2] = stack.pop()
+ elif opcode == 215: # setlocal_3
+ registers[3] = stack.pop()
+ else:
+ raise NotImplementedError(
+ u'Unsupported opcode %d' % opcode)
+
+ method_pyfunctions[func_name] = resfunc
+ return resfunc
+
+ initial_function = extract_function(u'decipher')
+ return lambda s: initial_function([s])
+
+ def _decrypt_signature(self, s, video_id, jsplayer_url, age_gate=False):
"""Turn the encrypted s field into a working signature"""
- if len(s) == 93:
- return s[86:29:-1] + s[88] + s[28:5:-1]
- elif len(s) == 92:
+ if jsplayer_url is not None:
+ try:
+ if jsplayer_url not in self._jsplayer_cache:
+ self._jsplayer_cache[jsplayer_url] = self._extract_signature_function(
+ video_id, jsplayer_url
+ )
+ return self._jsplayer_cache[jsplayer_url]([s])
+ except Exception as e:
+ tb = traceback.format_exc()
+ self._downloader.report_warning(u'Automatic signature extraction failed: ' + tb)
+
+ self._downloader.report_warning(u'Warning: Falling back to static signature algorithm')
+
+ if age_gate:
+ # The videos with age protection use another player, so the
+ # algorithms can be different.
+ if len(s) == 86:
+ return s[2:63] + s[82] + s[64:82] + s[63]
+
+ if len(s) == 92:
return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
- elif len(s) == 91:
- return s[84:27:-1] + s[86] + s[26:5:-1]
elif len(s) == 90:
return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
elif len(s) == 89:
@@ -631,7 +1190,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_webpage = video_webpage_bytes.decode('utf-8', 'ignore')
# Attempt to extract SWF player URL
- mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
+ mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
if mobj is not None:
player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
else:
@@ -784,21 +1343,31 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if 'sig' in url_data:
url += '&signature=' + url_data['sig'][0]
elif 's' in url_data:
- if self._downloader.params.get('verbose'):
- s = url_data['s'][0]
- if age_gate:
- player = 'flash player'
- else:
- player = u'html5 player %s' % self._search_regex(r'html5player-(.+?)\.js', video_webpage,
- 'html5 player', fatal=False)
- parts_sizes = u'.'.join(compat_str(len(part)) for part in s.split('.'))
- self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
- (len(s), parts_sizes, url_data['itag'][0], player))
encrypted_sig = url_data['s'][0]
+ if self._downloader.params.get('verbose'):
+ if age_gate:
+ player_version = self._search_regex(r'-(.+)\.swf$',
+ player_url if player_url else 'NOT FOUND',
+ 'flash player', fatal=False)
+ player_desc = 'flash player %s' % player_version
+ else:
+ player_version = self._search_regex(r'html5player-(.+?)\.js', video_webpage,
+ 'html5 player', fatal=False)
+ player_desc = u'html5 player %s' % player_version
+
+ parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.'))
+ self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
+ (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
+
if age_gate:
- signature = self._decrypt_signature_age_gate(encrypted_sig)
+ jsplayer_url = None
else:
- signature = self._decrypt_signature(encrypted_sig)
+ jsplayer_url_json = self._search_regex(
+ r'"assets":.+?"js":\s*("[^"]+")',
+ video_webpage, u'JS player URL')
+ jsplayer_url = json.loads(jsplayer_url_json)
+
+ signature = self._decrypt_signature(encrypted_sig, video_id, jsplayer_url, age_gate)
url += '&signature=' + signature
if 'ratebypass' not in url:
url += '&ratebypass=yes'
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 814a9b6be..201ed255d 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -66,6 +66,12 @@ try:
except ImportError: # Python 2
from urllib2 import HTTPError as compat_HTTPError
+try:
+ from urllib.request import urlretrieve as compat_urlretrieve
+except ImportError: # Python 2
+ from urllib import urlretrieve as compat_urlretrieve
+
+
try:
from subprocess import DEVNULL
compat_subprocess_get_DEVNULL = lambda: DEVNULL
From a7177865b19cdf711f15e01541aee9deae97a56c Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sat, 21 Sep 2013 14:48:12 +0200
Subject: [PATCH 128/215] Implement more opcodes
---
youtube_dl/extractor/youtube.py | 45 ++++++++++++++++++++++++++++-----
1 file changed, 38 insertions(+), 7 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 456d3cb0f..b57693ee6 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -863,13 +863,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
coder = io.BytesIO(m.code)
while True:
opcode = struct.unpack('!B', coder.read(1))[0]
- if opcode == 208: # getlocal_0
- stack.append(registers[0])
- elif opcode == 209: # getlocal_1
- stack.append(registers[1])
- elif opcode == 210: # getlocal_2
- stack.append(registers[2])
- elif opcode == 36: # pushbyte
+ if opcode == 36: # pushbyte
v = struct.unpack('!B', coder.read(1))[0]
stack.append(v)
elif opcode == 44: # pushstring
@@ -895,12 +889,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else:
res = obj.split(args[0])
stack.append(res)
+ elif mname == u'slice':
+ assert len(args) == 1
+ assert isinstance(args[0], int)
+ assert isinstance(obj, list)
+ res = obj[args[0]:]
+ stack.append(res)
+ elif mname == u'join':
+ assert len(args) == 1
+ assert isinstance(args[0], compat_str)
+ assert isinstance(obj, list)
+ res = args[0].join(obj)
+ stack.append(res)
elif mname in method_pyfunctions:
stack.append(method_pyfunctions[mname](args))
else:
raise NotImplementedError(
u'Unsupported property %r on %r'
% (mname, obj))
+ elif opcode == 72: # returnvalue
+ res = stack.pop()
+ return res
+ elif opcode == 79: # callpropvoid
+ index = u30(coder)
+ mname = multinames[index]
+ arg_count = u30(coder)
+ args = list(reversed(
+ [stack.pop() for _ in range(arg_count)]))
+ obj = stack.pop()
+ if mname == u'reverse':
+ assert isinstance(obj, list)
+ obj.reverse()
+ else:
+ raise NotImplementedError(
+ u'Unsupported (void) property %r on %r'
+ % (mname, obj))
elif opcode == 93: # findpropstrict
index = u30(coder)
mname = multinames[index]
@@ -943,6 +966,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
value1 = stack.pop()
res = value1 % value2
stack.append(res)
+ elif opcode == 208: # getlocal_0
+ stack.append(registers[0])
+ elif opcode == 209: # getlocal_1
+ stack.append(registers[1])
+ elif opcode == 210: # getlocal_2
+ stack.append(registers[2])
+ elif opcode == 211: # getlocal_3
+ stack.append(registers[3])
elif opcode == 214: # setlocal_2
registers[2] = stack.pop()
elif opcode == 215: # setlocal_3
From 95dbd2f9907416e86424e4372dbd2593c1699e7d Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sat, 21 Sep 2013 15:10:38 +0200
Subject: [PATCH 129/215] Change test target (Verified with node.js)
---
test/test_youtube_signature.py | 2 +-
youtube_dl/extractor/youtube.py | 1 -
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 2c06caef4..36533cf1f 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -30,7 +30,7 @@ _TESTS = [
u'https://s.ytimg.com/yts/swfbin/watch_as3-vflg5GhxU.swf',
u'swf',
82,
- u'23456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!?#$%&\'()*+,-./:;<=>"'
+ u':/.-,+*)=\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBAzyxw>utsrqponmlkjihgfedcba987654321'
),
]
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index b57693ee6..45b593a12 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -857,7 +857,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
m = methods[func_name]
def resfunc(args):
- print('Entering function %s(%r)' % (func_name, args))
registers = ['(this)'] + list(args) + [None] * m.local_count
stack = []
coder = io.BytesIO(m.code)
From 8379969834b787708ef5574dc447028c1caf295b Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sat, 21 Sep 2013 15:19:48 +0200
Subject: [PATCH 130/215] Prepare signature function caching
---
youtube_dl/extractor/youtube.py | 57 ++++++++++++++++++++-------------
1 file changed, 35 insertions(+), 22 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 45b593a12..2cd2fdce3 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -400,7 +400,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
def __init__(self, *args, **kwargs):
super(YoutubeIE, self).__init__(*args, **kwargs)
- self._jsplayer_cache = {}
+ self._player_cache = {}
def report_video_webpage_download(self, video_id):
"""Report attempt to download video webpage."""
@@ -423,26 +423,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
self.to_screen(u'RTMP download detected')
def _extract_signature_function(self, video_id, player_url):
- id_m = re.match(r'.*-(?P[^.]+)\.(?P[^.]+)$', player_url)
+ id_m = re.match(r'.*-(?P[a-zA-Z0-9]+)\.(?P[a-z]+)$',
+ player_url)
player_type = id_m.group('ext')
player_id = id_m.group('id')
+ # TODO read from filesystem cache
+
if player_type == 'js':
code = self._download_webpage(
player_url, video_id,
- note=u'Downloading %s player %s' % (player_type, jsplayer_id),
+ note=u'Downloading %s player %s' % (player_type, player_id),
errnote=u'Download of %s failed' % player_url)
- return self._parse_sig_js(code)
+ res = self._parse_sig_js(code)
elif player_tpye == 'swf':
urlh = self._request_webpage(
player_url, video_id,
- note=u'Downloading %s player %s' % (player_type, jsplayer_id),
+ note=u'Downloading %s player %s' % (player_type, player_id),
errnote=u'Download of %s failed' % player_url)
code = urlh.read()
- return self._parse_sig_swf(code)
+ res = self._parse_sig_swf(code)
else:
assert False, 'Invalid player type %r' % player_type
+ # TODO write cache
+
+ return res
+
def _parse_sig_js(self, jscode):
funcname = self._search_regex(
r'signature=([a-zA-Z]+)', jscode,
@@ -987,22 +994,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
initial_function = extract_function(u'decipher')
return lambda s: initial_function([s])
- def _decrypt_signature(self, s, video_id, jsplayer_url, age_gate=False):
+ def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
"""Turn the encrypted s field into a working signature"""
- if jsplayer_url is not None:
+ if player_url is not None:
try:
- if jsplayer_url not in self._jsplayer_cache:
- self._jsplayer_cache[jsplayer_url] = self._extract_signature_function(
- video_id, jsplayer_url
+ if player_url not in self._player_cache:
+ func = self._extract_signature_function(
+ video_id, player_url
)
- return self._jsplayer_cache[jsplayer_url]([s])
+ self._player_cache[player_url] = func
+ return self._player_cache[player_url](s)
except Exception as e:
tb = traceback.format_exc()
- self._downloader.report_warning(u'Automatic signature extraction failed: ' + tb)
+ self._downloader.report_warning(
+ u'Automatic signature extraction failed: ' + tb)
- self._downloader.report_warning(u'Warning: Falling back to static signature algorithm')
+ self._downloader.report_warning(
+ u'Warning: Falling back to static signature algorithm')
+ return self._static_decrypt_signature(s)
+ def _static_decrypt_signature(self, s):
if age_gate:
# The videos with age protection use another player, so the
# algorithms can be different.
@@ -1376,12 +1388,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
encrypted_sig = url_data['s'][0]
if self._downloader.params.get('verbose'):
if age_gate:
- player_version = self._search_regex(r'-(.+)\.swf$',
- player_url if player_url else 'NOT FOUND',
+ player_version = self._search_regex(
+ r'-(.+)\.swf$',
+ player_url if player_url else None,
'flash player', fatal=False)
player_desc = 'flash player %s' % player_version
else:
- player_version = self._search_regex(r'html5player-(.+?)\.js', video_webpage,
+ player_version = self._search_regex(
+ r'html5player-(.+?)\.js', video_webpage,
'html5 player', fatal=False)
player_desc = u'html5 player %s' % player_version
@@ -1389,15 +1403,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' %
(len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc))
- if age_gate:
- jsplayer_url = None
- else:
+ if not age_gate:
jsplayer_url_json = self._search_regex(
r'"assets":.+?"js":\s*("[^"]+")',
video_webpage, u'JS player URL')
- jsplayer_url = json.loads(jsplayer_url_json)
+ player_url = json.loads(jsplayer_url_json)
- signature = self._decrypt_signature(encrypted_sig, video_id, jsplayer_url, age_gate)
+ signature = self._decrypt_signature(
+ encrypted_sig, video_id, player_url, age_gate)
url += '&signature=' + signature
if 'ratebypass' not in url:
url += '&ratebypass=yes'
From ba552f542f674d35de21d48978f211b8db3f0ff8 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sat, 21 Sep 2013 15:32:37 +0200
Subject: [PATCH 131/215] Use reader instead of indexing
---
youtube_dl/extractor/youtube.py | 254 +++++++++++++++-----------------
1 file changed, 118 insertions(+), 136 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 2cd2fdce3..09bd423f5 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -590,99 +590,83 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
for tag_code, tag in extract_tags(content)
if tag_code == 82)
p = code_tag.index(b'\0', 4) + 1
+ code_reader = io.BytesIO(code_tag[p:])
# Parse ABC (AVM2 ByteCode)
- def read_int(data=None, pos=None):
- if hasattr(data, 'read'):
- assert pos is None
-
- res = 0
- shift = 0
- for _ in range(5):
- buf = data.read(1)
- assert len(buf) == 1
- b = struct.unpack('> 4
methods = {}
if kind in [0x00, 0x06]: # Slot or Const
- _, pos = u30(pos=pos) # Slot id
- type_name_idx, pos = u30(pos=pos)
- vindex, pos = u30(pos=pos)
+ _ = u30() # Slot id
+ type_name_idx = u30()
+ vindex = u30()
if vindex != 0:
- _, pos = read_byte(pos=pos) # vkind
+ _ = read_byte() # vkind
elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
- _, pos = u30(pos=pos) # disp_id
- method_idx, pos = u30(pos=pos)
+ _ = u30() # disp_id
+ method_idx = u30()
methods[multinames[trait_name_idx]] = method_idx
elif kind == 0x04: # Class
- _, pos = u30(pos=pos) # slot_id
- _, pos = u30(pos=pos) # classi
+ _ = u30() # slot_id
+ _ = u30() # classi
elif kind == 0x05: # Function
- _, pos = u30(pos=pos) # slot_id
- function_idx, pos = u30(pos=pos)
+ _ = u30() # slot_id
+ function_idx = u30()
methods[function_idx] = multinames[trait_name_idx]
else:
raise ExtractorError(u'Unsupported trait kind %d' % kind)
if attrs & 0x4 != 0: # Metadata present
- metadata_count, pos = u30(pos=pos)
+ metadata_count = u30()
for _c3 in range(metadata_count):
- _, pos = u30(pos=pos)
+ _ = u30()
- return (methods, pos)
+ return methods
# Classes
TARGET_CLASSNAME = u'SignatureDecipher'
searched_idx = multinames.index(TARGET_CLASSNAME)
searched_class_id = None
- class_count, p = u30()
+ class_count = u30()
for class_id in range(class_count):
- name_idx, p = u30()
+ name_idx = u30()
if name_idx == searched_idx:
# We found the class we're looking for!
searched_class_id = class_id
- _, p = u30() # super_name idx
- flags, p = read_byte()
+ _ = u30() # super_name idx
+ flags = read_byte()
if flags & 0x08 != 0: # Protected namespace is present
- protected_ns_idx, p = u30()
- intrf_count, p = u30()
+ protected_ns_idx = u30()
+ intrf_count = u30()
for _c2 in range(intrf_count):
- _, p = u30()
- _, p = u30() # iinit
- trait_count, p = u30()
+ _ = u30()
+ _ = u30() # iinit
+ trait_count = u30()
for _c2 in range(trait_count):
- _, p = parse_traits_info()
+ _ = parse_traits_info()
if searched_class_id is None:
raise ExtractorError(u'Target class %r not found' %
@@ -807,10 +789,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
method_names = {}
method_idxs = {}
for class_id in range(class_count):
- _, p = u30() # cinit
- trait_count, p = u30()
+ _ = u30() # cinit
+ trait_count = u30()
for _c2 in range(trait_count):
- trait_methods, p = parse_traits_info()
+ trait_methods = parse_traits_info()
if class_id == searched_class_id:
method_names.update(trait_methods.items())
method_idxs.update(dict(
@@ -818,40 +800,40 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
for name, idx in trait_methods.items()))
# Scripts
- script_count, p = u30()
+ script_count = u30()
for _c in range(script_count):
- _, p = u30() # init
- trait_count, p = u30()
+ _ = u30() # init
+ trait_count = u30()
for _c2 in range(trait_count):
- _, p = parse_traits_info()
+ _ = parse_traits_info()
# Method bodies
- method_body_count, p = u30()
+ method_body_count = u30()
Method = collections.namedtuple('Method', ['code', 'local_count'])
methods = {}
for _c in range(method_body_count):
- method_idx, p = u30()
- max_stack, p = u30()
- local_count, p = u30()
- init_scope_depth, p = u30()
- max_scope_depth, p = u30()
- code_length, p = u30()
+ method_idx = u30()
+ max_stack = u30()
+ local_count = u30()
+ init_scope_depth = u30()
+ max_scope_depth = u30()
+ code_length = u30()
+ code = read_bytes(code_length)
if method_idx in method_idxs:
- m = Method(code_tag[p:p+code_length], local_count)
+ m = Method(code, local_count)
methods[method_idxs[method_idx]] = m
- p += code_length
- exception_count, p = u30()
+ exception_count = u30()
for _c2 in range(exception_count):
- _, p = u30() # from
- _, p = u30() # to
- _, p = u30() # target
- _, p = u30() # exc_type
- _, p = u30() # var_name
- trait_count, p = u30()
+ _ = u30() # from
+ _ = u30() # to
+ _ = u30() # target
+ _ = u30() # exc_type
+ _ = u30() # var_name
+ trait_count = u30()
for _c2 in range(trait_count):
- _, p = parse_traits_info()
+ _ = parse_traits_info()
- assert p == len(code_tag)
+ assert p + code_reader.tell() == len(code_tag)
assert len(methods) == len(method_idxs)
method_pyfunctions = {}
From 2f2ffea9cad7d30165a0171bf6e662bef2182ab4 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sat, 21 Sep 2013 15:34:29 +0200
Subject: [PATCH 132/215] Clarify a couple of calls
---
youtube_dl/extractor/youtube.py | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 09bd423f5..5c0ea2e43 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -641,7 +641,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return res
# minor_version + major_version
- _ = read_bytes(4)
+ _ = read_bytes(2 + 2)
# Constant pool
int_count = u30()
@@ -994,9 +994,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
self._downloader.report_warning(
u'Warning: Falling back to static signature algorithm')
- return self._static_decrypt_signature(s)
+ return self._static_decrypt_signature(
+ s, video_id, player_url, age_gate)
- def _static_decrypt_signature(self, s):
+ def _static_decrypt_signature(self, s, video_id, player_url, age_gate):
if age_gate:
# The videos with age protection use another player, so the
# algorithms can be different.
From 4a2080e4077e9e12c860d82a4d2eebc75c1ea54b Mon Sep 17 00:00:00 2001
From: tewe
Date: Sun, 15 Sep 2013 21:58:49 +0200
Subject: [PATCH 133/215] [youku] better error handling
blocked videos used to cause death by TypeError, now we report what the
server says
---
youtube_dl/extractor/youku.py | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/youtube_dl/extractor/youku.py b/youtube_dl/extractor/youku.py
index 996d38478..00fa2ccb5 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dl/extractor/youku.py
@@ -66,6 +66,12 @@ class YoukuIE(InfoExtractor):
self.report_extraction(video_id)
try:
config = json.loads(jsondata)
+ error_code = config['data'][0].get('error_code')
+ if error_code:
+ # -8 means blocked outside China.
+ error = config['data'][0].get('error') # Chinese and English, separated by newline.
+ raise ExtractorError(error or u'Server reported error %i' % error_code,
+ expected=True)
video_title = config['data'][0]['title']
seed = config['data'][0]['seed']
@@ -89,6 +95,7 @@ class YoukuIE(InfoExtractor):
fileid = config['data'][0]['streamfileids'][format]
keys = [s['k'] for s in config['data'][0]['segs'][format]]
+ # segs is usually a dictionary, but an empty *list* if an error occured.
except (UnicodeDecodeError, ValueError, KeyError):
raise ExtractorError(u'Unable to extract info section')
From c4417ddb611e14b81fe56b6b32964c5802faf554 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sun, 22 Sep 2013 00:35:03 +0200
Subject: [PATCH 134/215] [youtube] Add filesystem signature cache
---
youtube_dl/FileDownloader.py | 2 ++
youtube_dl/extractor/youtube.py | 35 ++++++++++++++++++++++++++-------
2 files changed, 30 insertions(+), 7 deletions(-)
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index 0b5a5d77d..1eb71a80e 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -39,6 +39,8 @@ class FileDownloader(object):
test: Download only first bytes to test the downloader.
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
+ cachedir: Location of the cache files in the filesystem.
+ False to disable filesystem cache.
"""
params = None
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 5c0ea2e43..63f59ae8f 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -4,8 +4,10 @@ import collections
import itertools
import io
import json
-import netrc
+import operator
+import os.path
import re
+import shutil
import socket
import string
import struct
@@ -422,13 +424,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
"""Indicate the download will use the RTMP protocol."""
self.to_screen(u'RTMP download detected')
- def _extract_signature_function(self, video_id, player_url):
- id_m = re.match(r'.*-(?P[a-zA-Z0-9]+)\.(?P[a-z]+)$',
+ def _extract_signature_function(self, video_id, player_url, slen):
+ id_m = re.match(r'.*-(?P[a-zA-Z0-9_-]+)\.(?P[a-z]+)$',
player_url)
player_type = id_m.group('ext')
player_id = id_m.group('id')
- # TODO read from filesystem cache
+ # Read from filesystem cache
+ func_id = '%s_%s_%d' % (player_type, player_id, slen)
+ assert os.path.basename(func_id) == func_id
+ cache_dir = self.downloader.params.get('cachedir',
+ u'~/.youtube-dl/cache')
+
+ if cache_dir is not False:
+ cache_fn = os.path.join(os.path.expanduser(cache_dir),
+ u'youtube-sigfuncs',
+ func_id + '.json')
+ try:
+ with io.open(cache_fn, '', encoding='utf-8') as cachef:
+ cache_spec = json.load(cachef)
+ return lambda s: u''.join(s[i] for i in cache_spec)
+ except OSError:
+ pass # No cache available
if player_type == 'js':
code = self._download_webpage(
@@ -436,7 +453,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
note=u'Downloading %s player %s' % (player_type, player_id),
errnote=u'Download of %s failed' % player_url)
res = self._parse_sig_js(code)
- elif player_tpye == 'swf':
+ elif player_type == 'swf':
urlh = self._request_webpage(
player_url, video_id,
note=u'Downloading %s player %s' % (player_type, player_id),
@@ -446,7 +463,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else:
assert False, 'Invalid player type %r' % player_type
- # TODO write cache
+ if cache_dir is not False:
+ cache_res = res(map(compat_chr, range(slen)))
+ cache_spec = [ord(c) for c in cache_res]
+ shutil.makedirs(os.path.dirname(cache_fn))
+ write_json_file(cache_spec, cache_fn)
return res
@@ -983,7 +1004,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
try:
if player_url not in self._player_cache:
func = self._extract_signature_function(
- video_id, player_url
+ video_id, player_url, len(s)
)
self._player_cache[player_url] = func
return self._player_cache[player_url](s)
From edf3e38ebd6c5db21585dc7b6384e325e6cfb540 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sun, 22 Sep 2013 10:30:02 +0200
Subject: [PATCH 135/215] [youtube] Improve cache and add an option to print
the extracted signatures
---
youtube_dl/FileDownloader.py | 2 +-
youtube_dl/__init__.py | 6 +++
youtube_dl/extractor/youtube.py | 69 +++++++++++++++++++++++++++------
3 files changed, 65 insertions(+), 12 deletions(-)
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index 1eb71a80e..604714134 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -40,7 +40,7 @@ class FileDownloader(object):
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
cachedir: Location of the cache files in the filesystem.
- False to disable filesystem cache.
+ "NONE" to disable filesystem cache.
"""
params = None
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 1ed30aae3..072f69f2e 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -167,6 +167,7 @@ def parseOpts(overrideArguments=None):
help='Output descriptions of all supported extractors', default=False)
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
+ general.add_option('--cache-dir', dest='cachedir', default=u'~/.youtube-dl/cache', help='Location in the filesystem where youtube-dl can store downloaded information permanently. NONE to disable filesystem caching, %default by default')
selection.add_option('--playlist-start',
@@ -272,6 +273,10 @@ def parseOpts(overrideArguments=None):
verbosity.add_option('--dump-intermediate-pages',
action='store_true', dest='dump_intermediate_pages', default=False,
help='print downloaded pages to debug problems(very verbose)')
+ verbosity.add_option('--youtube-print-sig-code',
+ action='store_true', dest='youtube_print_sig_code', default=False,
+ help=optparse.SUPPRESS_HELP)
+
filesystem.add_option('-t', '--title',
action='store_true', dest='usetitle', help='use title in file name (default)', default=False)
@@ -613,6 +618,7 @@ def _real_main(argv=None):
'min_filesize': opts.min_filesize,
'max_filesize': opts.max_filesize,
'daterange': date,
+ 'youtube_print_sig_code': opts.youtube_print_sig_code
})
if opts.verbose:
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 63f59ae8f..4200f987e 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1,13 +1,13 @@
# coding: utf-8
import collections
+import errno
import itertools
import io
import json
import operator
import os.path
import re
-import shutil
import socket
import string
import struct
@@ -17,6 +17,7 @@ import zlib
from .common import InfoExtractor, SearchInfoExtractor
from .subtitles import SubtitlesInfoExtractor
from ..utils import (
+ compat_chr,
compat_http_client,
compat_parse_qs,
compat_urllib_error,
@@ -30,6 +31,7 @@ from ..utils import (
unescapeHTML,
unified_strdate,
orderedSet,
+ write_json_file,
)
class YoutubeBaseInfoExtractor(InfoExtractor):
@@ -433,18 +435,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# Read from filesystem cache
func_id = '%s_%s_%d' % (player_type, player_id, slen)
assert os.path.basename(func_id) == func_id
- cache_dir = self.downloader.params.get('cachedir',
- u'~/.youtube-dl/cache')
+ cache_dir = self._downloader.params.get('cachedir',
+ u'~/.youtube-dl/cache')
- if cache_dir is not False:
+ if cache_dir != u'NONE':
cache_fn = os.path.join(os.path.expanduser(cache_dir),
u'youtube-sigfuncs',
func_id + '.json')
try:
- with io.open(cache_fn, '', encoding='utf-8') as cachef:
+ with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
cache_spec = json.load(cachef)
return lambda s: u''.join(s[i] for i in cache_spec)
- except OSError:
+ except IOError:
pass # No cache available
if player_type == 'js':
@@ -464,13 +466,55 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
assert False, 'Invalid player type %r' % player_type
if cache_dir is not False:
- cache_res = res(map(compat_chr, range(slen)))
- cache_spec = [ord(c) for c in cache_res]
- shutil.makedirs(os.path.dirname(cache_fn))
- write_json_file(cache_spec, cache_fn)
+ try:
+ cache_res = res(map(compat_chr, range(slen)))
+ cache_spec = [ord(c) for c in cache_res]
+ try:
+ os.makedirs(os.path.dirname(cache_fn))
+ except OSError as ose:
+ if ose.errno != errno.EEXIST:
+ raise
+ write_json_file(cache_spec, cache_fn)
+ except Exception as e:
+ tb = traceback.format_exc()
+ self._downloader.report_warning(
+ u'Writing cache to %r failed: %s' % (cache_fn, tb))
return res
+ def _print_sig_code(self, func, slen):
+ def gen_sig_code(idxs):
+ def _genslice(start, end, step):
+ starts = u'' if start == 0 else str(start)
+ ends = u':%d' % (end+step)
+ steps = u'' if step == 1 else (':%d' % step)
+ return u's[%s%s%s]' % (starts, ends, steps)
+
+ step = None
+ for i, prev in zip(idxs[1:], idxs[:-1]):
+ if step is not None:
+ if i - prev == step:
+ continue
+ yield _genslice(start, prev, step)
+ step = None
+ continue
+ if i - prev in [-1, 1]:
+ step = i - prev
+ start = prev
+ continue
+ else:
+ yield u's[%d]' % prev
+ if step is None:
+ yield u's[%d]' % i
+ else:
+ yield _genslice(start, i, step)
+
+ cache_res = func(map(compat_chr, range(slen)))
+ cache_spec = [ord(c) for c in cache_res]
+ expr_code = u' + '.join(gen_sig_code(cache_spec))
+ code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code)
+ self.to_screen(u'Extracted signature:\n' + code)
+
def _parse_sig_js(self, jscode):
funcname = self._search_regex(
r'signature=([a-zA-Z]+)', jscode,
@@ -1007,7 +1051,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_id, player_url, len(s)
)
self._player_cache[player_url] = func
- return self._player_cache[player_url](s)
+ func = self._player_cache[player_url]
+ if self._downloader.params.get('youtube_print_sig_code'):
+ self._print_sig_code(func, len(s))
+ return func(s)
except Exception as e:
tb = traceback.format_exc()
self._downloader.report_warning(
From 4ba146f35dd797e9d78636cb3cffabb100575240 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sun, 22 Sep 2013 10:31:25 +0200
Subject: [PATCH 136/215] Update static signatures
---
youtube_dl/extractor/youtube.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 4200f987e..8245349b2 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1072,8 +1072,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if len(s) == 86:
return s[2:63] + s[82] + s[64:82] + s[63]
- if len(s) == 92:
+ if len(s) == 93:
+ return s[86:29:-1] + s[88] + s[28:5:-1]
+ elif len(s) == 92:
return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83]
+ elif len(s) == 91:
+ return s[84:27:-1] + s[86] + s[26:5:-1]
elif len(s) == 90:
return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81]
elif len(s) == 89:
From 0ca96d48c7f74e122be70b71bb5fe38f4b143cb0 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sun, 22 Sep 2013 10:37:23 +0200
Subject: [PATCH 137/215] [youtube] Improve source code quality
---
youtube_dl/extractor/youtube.py | 104 ++++++++++++++++----------------
1 file changed, 53 insertions(+), 51 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 8245349b2..a9bfc455f 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -2,16 +2,16 @@
import collections
import errno
-import itertools
import io
+import itertools
import json
-import operator
import os.path
import re
import socket
import string
import struct
import traceback
+import xml.etree.ElementTree
import zlib
from .common import InfoExtractor, SearchInfoExtractor
@@ -475,7 +475,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if ose.errno != errno.EEXIST:
raise
write_json_file(cache_spec, cache_fn)
- except Exception as e:
+ except Exception:
tb = traceback.format_exc()
self._downloader.report_warning(
u'Writing cache to %r failed: %s' % (cache_fn, tb))
@@ -491,6 +491,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return u's[%s%s%s]' % (starts, ends, steps)
step = None
+ start = '(Never used)' # Quelch pyflakes warnings - start will be
+ # set as soon as step is set
for i, prev in zip(idxs[1:], idxs[:-1]):
if step is not None:
if i - prev == step:
@@ -527,7 +529,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
def interpret_statement(stmt, local_vars, allow_recursion=20):
if allow_recursion < 0:
- raise ExctractorError(u'Recursion limit reached')
+ raise ExtractorError(u'Recursion limit reached')
if stmt.startswith(u'var '):
stmt = stmt[len(u'var '):]
@@ -685,7 +687,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
v = - ((v ^ 0xffffffff) + 1)
return v
- def string(reader=None):
+ def read_string(reader=None):
if reader is None:
reader = code_reader
slen = u30(reader)
@@ -706,31 +708,31 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return res
# minor_version + major_version
- _ = read_bytes(2 + 2)
+ read_bytes(2 + 2)
# Constant pool
int_count = u30()
for _c in range(1, int_count):
- _ = s32()
+ s32()
uint_count = u30()
for _c in range(1, uint_count):
- _ = u32()
+ u32()
double_count = u30()
- _ = read_bytes((double_count-1) * 8)
+ read_bytes((double_count-1) * 8)
string_count = u30()
constant_strings = [u'']
for _c in range(1, string_count):
- s = string()
+ s = read_string()
constant_strings.append(s)
namespace_count = u30()
for _c in range(1, namespace_count):
- _ = read_bytes(1) # kind
- _ = u30() # name
+ read_bytes(1) # kind
+ u30() # name
ns_set_count = u30()
for _c in range(1, ns_set_count):
count = u30()
for _c2 in range(count):
- _ = u30()
+ u30()
multiname_count = u30()
MULTINAME_SIZES = {
0x07: 2, # QName
@@ -749,13 +751,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
kind = u30()
assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind
if kind == 0x07:
- namespace_idx = u30()
+ u30() # namespace_idx
name_idx = u30()
multinames.append(constant_strings[name_idx])
else:
multinames.append('[MULTINAME kind: %d]' % kind)
for _c2 in range(MULTINAME_SIZES[kind]):
- _ = u30()
+ u30()
# Methods
method_count = u30()
@@ -765,32 +767,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
method_infos = []
for method_id in range(method_count):
param_count = u30()
- _ = u30() # return type
+ u30() # return type
for _ in range(param_count):
- _ = u30() # param type
- _ = u30() # name index (always 0 for youtube)
+ u30() # param type
+ u30() # name index (always 0 for youtube)
flags = read_byte()
if flags & 0x08 != 0:
# Options present
option_count = u30()
for c in range(option_count):
- _ = u30() # val
- _ = read_bytes(1) # kind
+ u30() # val
+ read_bytes(1) # kind
if flags & 0x80 != 0:
# Param names present
for _ in range(param_count):
- _ = u30() # param name
+ u30() # param name
mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0)
method_infos.append(mi)
# Metadata
metadata_count = u30()
for _c in range(metadata_count):
- _ = u30() # name
+ u30() # name
item_count = u30()
for _c2 in range(item_count):
- _ = u30() # key
- _ = u30() # value
+ u30() # key
+ u30() # value
def parse_traits_info():
trait_name_idx = u30()
@@ -799,20 +801,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
attrs = kind_full >> 4
methods = {}
if kind in [0x00, 0x06]: # Slot or Const
- _ = u30() # Slot id
- type_name_idx = u30()
+ u30() # Slot id
+ u30() # type_name_idx
vindex = u30()
if vindex != 0:
- _ = read_byte() # vkind
+ read_byte() # vkind
elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter
- _ = u30() # disp_id
+ u30() # disp_id
method_idx = u30()
methods[multinames[trait_name_idx]] = method_idx
elif kind == 0x04: # Class
- _ = u30() # slot_id
- _ = u30() # classi
+ u30() # slot_id
+ u30() # classi
elif kind == 0x05: # Function
- _ = u30() # slot_id
+ u30() # slot_id
function_idx = u30()
methods[function_idx] = multinames[trait_name_idx]
else:
@@ -821,7 +823,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if attrs & 0x4 != 0: # Metadata present
metadata_count = u30()
for _c3 in range(metadata_count):
- _ = u30()
+ u30() # metadata index
return methods
@@ -835,17 +837,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if name_idx == searched_idx:
# We found the class we're looking for!
searched_class_id = class_id
- _ = u30() # super_name idx
+ u30() # super_name idx
flags = read_byte()
if flags & 0x08 != 0: # Protected namespace is present
- protected_ns_idx = u30()
+ u30() # protected_ns_idx
intrf_count = u30()
for _c2 in range(intrf_count):
- _ = u30()
- _ = u30() # iinit
+ u30()
+ u30() # iinit
trait_count = u30()
for _c2 in range(trait_count):
- _ = parse_traits_info()
+ parse_traits_info()
if searched_class_id is None:
raise ExtractorError(u'Target class %r not found' %
@@ -854,7 +856,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
method_names = {}
method_idxs = {}
for class_id in range(class_count):
- _ = u30() # cinit
+ u30() # cinit
trait_count = u30()
for _c2 in range(trait_count):
trait_methods = parse_traits_info()
@@ -867,10 +869,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# Scripts
script_count = u30()
for _c in range(script_count):
- _ = u30() # init
+ u30() # init
trait_count = u30()
for _c2 in range(trait_count):
- _ = parse_traits_info()
+ parse_traits_info()
# Method bodies
method_body_count = u30()
@@ -878,10 +880,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
methods = {}
for _c in range(method_body_count):
method_idx = u30()
- max_stack = u30()
+ u30() # max_stack
local_count = u30()
- init_scope_depth = u30()
- max_scope_depth = u30()
+ u30() # init_scope_depth
+ u30() # max_scope_depth
code_length = u30()
code = read_bytes(code_length)
if method_idx in method_idxs:
@@ -889,14 +891,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
methods[method_idxs[method_idx]] = m
exception_count = u30()
for _c2 in range(exception_count):
- _ = u30() # from
- _ = u30() # to
- _ = u30() # target
- _ = u30() # exc_type
- _ = u30() # var_name
+ u30() # from
+ u30() # to
+ u30() # target
+ u30() # exc_type
+ u30() # var_name
trait_count = u30()
for _c2 in range(trait_count):
- _ = parse_traits_info()
+ parse_traits_info()
assert p + code_reader.tell() == len(code_tag)
assert len(methods) == len(method_idxs)
@@ -1011,7 +1013,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
assert isinstance(obj, list)
stack.append(obj[idx])
elif opcode == 128: # coerce
- _ = u30(coder)
+ u30(coder)
elif opcode == 133: # coerce_s
assert isinstance(stack[-1], (type(None), compat_str))
elif opcode == 164: # modulo
@@ -1055,7 +1057,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if self._downloader.params.get('youtube_print_sig_code'):
self._print_sig_code(func, len(s))
return func(s)
- except Exception as e:
+ except Exception:
tb = traceback.format_exc()
self._downloader.report_warning(
u'Automatic signature extraction failed: ' + tb)
From f8061589e66f12f6c2ffac3d7bfba2a7ac0294d5 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sun, 22 Sep 2013 10:50:12 +0200
Subject: [PATCH 138/215] [youtube] Actually pass in cachedir option
---
youtube_dl/__init__.py | 3 ++-
youtube_dl/extractor/youtube.py | 7 ++++---
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 072f69f2e..a4769a8ae 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -618,7 +618,8 @@ def _real_main(argv=None):
'min_filesize': opts.min_filesize,
'max_filesize': opts.max_filesize,
'daterange': date,
- 'youtube_print_sig_code': opts.youtube_print_sig_code
+ 'cachedir': opts.cachedir,
+ 'youtube_print_sig_code': opts.youtube_print_sig_code,
})
if opts.verbose:
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index a9bfc455f..2dd2db673 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -438,7 +438,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
cache_dir = self._downloader.params.get('cachedir',
u'~/.youtube-dl/cache')
- if cache_dir != u'NONE':
+ cache_enabled = cache_dir != u'NONE'
+ if cache_enabled:
cache_fn = os.path.join(os.path.expanduser(cache_dir),
u'youtube-sigfuncs',
func_id + '.json')
@@ -465,7 +466,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else:
assert False, 'Invalid player type %r' % player_type
- if cache_dir is not False:
+ if cache_enabled:
try:
cache_res = res(map(compat_chr, range(slen)))
cache_spec = [ord(c) for c in cache_res]
@@ -515,7 +516,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
cache_spec = [ord(c) for c in cache_res]
expr_code = u' + '.join(gen_sig_code(cache_spec))
code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code)
- self.to_screen(u'Extracted signature:\n' + code)
+ self.to_screen(u'Extracted signature function:\n' + code)
def _parse_sig_js(self, jscode):
funcname = self._search_regex(
From c35f9e72ce842ecd476bee3767527da0e675dd1a Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sun, 22 Sep 2013 11:09:25 +0200
Subject: [PATCH 139/215] Move cachedir doc
---
youtube_dl/FileDownloader.py | 2 --
youtube_dl/YoutubeDL.py | 2 ++
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index 604714134..0b5a5d77d 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -39,8 +39,6 @@ class FileDownloader(object):
test: Download only first bytes to test the downloader.
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
- cachedir: Location of the cache files in the filesystem.
- "NONE" to disable filesystem cache.
"""
params = None
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index fa24ebe0d..ead1ccb1c 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -81,6 +81,8 @@ class YoutubeDL(object):
keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range.
skip_download: Skip the actual download of the video file
+ cachedir: Location of the cache files in the filesystem.
+ "NONE" to disable filesystem cache.
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
From 13dc64ce741520ba54ba9fff0ab1a3ac4e5c43a4 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sun, 22 Sep 2013 11:17:21 +0200
Subject: [PATCH 140/215] [youtube] Remove _decrypt_signature_age_gate
---
youtube_dl/extractor/youtube.py | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 2dd2db673..56ad33fdc 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1109,15 +1109,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else:
raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s)))
- def _decrypt_signature_age_gate(self, s):
- # The videos with age protection use another player, so the algorithms
- # can be different.
- if len(s) == 86:
- return s[2:63] + s[82] + s[64:82] + s[63]
- else:
- # Fallback to the other algortihms
- return self._decrypt_signature(s)
-
def _get_available_subtitles(self, video_id):
try:
sub_list = self._download_webpage(
From 45f4a76dbc268a56c212fe25cd27922541840cfe Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sun, 22 Sep 2013 11:45:29 +0200
Subject: [PATCH 141/215] Work around nosetests nosiness
---
test/test_youtube_signature.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index 36533cf1f..5007d9a16 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -43,7 +43,7 @@ class TestSignature(unittest.TestCase):
os.mkdir(self.TESTDATA_DIR)
-def make_testfunc(url, stype, sig_length, expected_sig):
+def make_tfunc(url, stype, sig_length, expected_sig):
basename = url.rpartition('/')[2]
m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename)
assert m, '%r should follow URL format' % basename
@@ -73,7 +73,7 @@ def make_testfunc(url, stype, sig_length, expected_sig):
setattr(TestSignature, test_func.__name__, test_func)
for test_spec in _TESTS:
- make_testfunc(*test_spec)
+ make_tfunc(*test_spec)
if __name__ == '__main__':
From bdde940e90320e350bd96df621ee7e32641e1eca Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sun, 22 Sep 2013 12:17:42 +0200
Subject: [PATCH 142/215] [youtube] Improve flash player URL handling
---
youtube_dl/extractor/youtube.py | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 56ad33fdc..888907c93 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1437,10 +1437,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
encrypted_sig = url_data['s'][0]
if self._downloader.params.get('verbose'):
if age_gate:
- player_version = self._search_regex(
- r'-(.+)\.swf$',
- player_url if player_url else None,
- 'flash player', fatal=False)
+ if player_url is None:
+ player_version = 'unknown'
+ else:
+ player_version = self._search_regex(
+ r'-(.+)\.swf$', player_url,
+ u'flash player', fatal=False)
player_desc = 'flash player %s' % player_version
else:
player_version = self._search_regex(
From d2d8f895310be7fa302ba7755c60d5948866fcaa Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sun, 22 Sep 2013 12:18:10 +0200
Subject: [PATCH 143/215] Do not warn if fallback is without alternatives
(because we did not get the flash player URL)
---
youtube_dl/extractor/youtube.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 888907c93..780690ed0 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1063,8 +1063,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
self._downloader.report_warning(
u'Automatic signature extraction failed: ' + tb)
- self._downloader.report_warning(
- u'Warning: Falling back to static signature algorithm')
+ self._downloader.report_warning(
+ u'Warning: Falling back to static signature algorithm')
return self._static_decrypt_signature(
s, video_id, player_url, age_gate)
From c705320f485cd962827fce464a93993569e3173f Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sun, 22 Sep 2013 12:18:16 +0200
Subject: [PATCH 144/215] Correct test strings
---
youtube_dl/extractor/youtube.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 780690ed0..049da2f91 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -468,7 +468,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
if cache_enabled:
try:
- cache_res = res(map(compat_chr, range(slen)))
+ test_string = u''.join(map(compat_chr, range(slen)))
+ cache_res = res(test_string)
cache_spec = [ord(c) for c in cache_res]
try:
os.makedirs(os.path.dirname(cache_fn))
@@ -512,7 +513,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
else:
yield _genslice(start, i, step)
- cache_res = func(map(compat_chr, range(slen)))
+ test_string = u''.join(map(compat_chr, range(slen)))
+ cache_res = func(test_string)
cache_spec = [ord(c) for c in cache_res]
expr_code = u' + '.join(gen_sig_code(cache_spec))
code = u'if len(s) == %d:\n return %s\n' % (slen, expr_code)
From 4ae720042c3959cae856ce93578a0ba4b5817870 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sun, 22 Sep 2013 23:31:39 +0200
Subject: [PATCH 145/215] Include the eta and the speed in the progress hooks
Useful when listening to the progress hook, for example in a GUI.
---
youtube_dl/FileDownloader.py | 45 ++++++++++++++++++++++++++++--------
1 file changed, 35 insertions(+), 10 deletions(-)
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index 0b5a5d77d..706592988 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -77,26 +77,43 @@ class FileDownloader(object):
@staticmethod
def calc_percent(byte_counter, data_len):
if data_len is None:
+ return None
+ return float(byte_counter) / float(data_len) * 100.0
+
+ @staticmethod
+ def format_percent(percent):
+ if percent is None:
return '---.-%'
- return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
+ return '%6s' % ('%3.1f%%' % percent)
@staticmethod
def calc_eta(start, now, total, current):
if total is None:
- return '--:--'
+ return None
dif = now - start
if current == 0 or dif < 0.001: # One millisecond
- return '--:--'
+ return None
rate = float(current) / dif
- eta = int((float(total) - float(current)) / rate)
+ return int((float(total) - float(current)) / rate)
+
+ @staticmethod
+ def format_eta(eta):
+ if eta is None:
+ return '--:--'
return FileDownloader.format_seconds(eta)
@staticmethod
def calc_speed(start, now, bytes):
dif = now - start
if bytes == 0 or dif < 0.001: # One millisecond
+ return None
+ return float(bytes) / dif
+
+ @staticmethod
+ def format_speed(speed):
+ if speed is None:
return '%10s' % '---b/s'
- return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
+ return '%10s' % ('%s/s' % FileDownloader.format_bytes(speed))
@staticmethod
def best_block_size(elapsed_time, bytes):
@@ -205,11 +222,14 @@ class FileDownloader(object):
"""Report destination filename."""
self.to_screen(u'[download] Destination: ' + filename)
- def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
+ def report_progress(self, percent, data_len_str, speed, eta):
"""Report download progress."""
if self.params.get('noprogress', False):
return
clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
+ eta_str = self.format_eta(eta)
+ percent_str = self.format_percent(percent)
+ speed_str = self.format_speed(speed)
if self.params.get('progress_with_newline', False):
self.to_screen(u'[download] %s of %s at %s ETA %s' %
(percent_str, data_len_str, speed_str, eta_str))
@@ -524,13 +544,14 @@ class FileDownloader(object):
block_size = self.best_block_size(after - before, len(data_block))
# Progress message
- speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
+ speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
if data_len is None:
self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
+ eta = None
else:
- percent_str = self.calc_percent(byte_counter, data_len)
- eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
- self.report_progress(percent_str, data_len_str, speed_str, eta_str)
+ percent = self.calc_percent(byte_counter, data_len)
+ eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
+ self.report_progress(percent, data_len_str, speed, eta)
self._hook_progress({
'downloaded_bytes': byte_counter,
@@ -538,6 +559,8 @@ class FileDownloader(object):
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
+ 'eta': eta,
+ 'speed': speed,
})
# Apply rate limit
@@ -580,6 +603,8 @@ class FileDownloader(object):
* downloaded_bytes: Bytes on disks
* total_bytes: Total bytes, None if unknown
* tmpfilename: The filename we're currently writing to
+ * eta: The estimated time in seconds, None if unknown
+ * speed: The download speed in bytes/second, None if unknown
Hooks are guaranteed to be called at least once (with status "finished")
if the download is successful.
From dd5d2eb03c3673cff5a27cc34c0271085002583e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sun, 22 Sep 2013 23:39:30 +0200
Subject: [PATCH 146/215] If the file is already downloaded include the size in
the progress hook
---
youtube_dl/FileDownloader.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index 706592988..d6673fd3a 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -398,6 +398,7 @@ class FileDownloader(object):
self._hook_progress({
'filename': filename,
'status': 'finished',
+ 'total_bytes': os.path.getsize(encodeFilename(filename)),
})
return True
From 81ec7c7901ddfe9366cf1af010eb31b906dcfce0 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Mon, 23 Sep 2013 11:24:10 +0200
Subject: [PATCH 147/215] [facebook] Allow untitled videos (Fixes #1484)
---
youtube_dl/extractor/facebook.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
index beaa5b4bd..9d1bc0751 100644
--- a/youtube_dl/extractor/facebook.py
+++ b/youtube_dl/extractor/facebook.py
@@ -106,8 +106,8 @@ class FacebookIE(InfoExtractor):
video_duration = int(video_data['video_duration'])
thumbnail = video_data['thumbnail_src']
- video_title = self._html_search_regex('',
- webpage, u'title')
+ video_title = self._html_search_regex(
+ r'', webpage, u'title')
info = {
'id': video_id,
From a825f33030f189a37b1c3517ed1770a8b9e274fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Mon, 23 Sep 2013 21:28:33 +0200
Subject: [PATCH 148/215] [francetv] Add an extractor for France2
---
youtube_dl/extractor/__init__.py | 1 +
youtube_dl/extractor/francetv.py | 22 ++++++++++++++++++++++
2 files changed, 23 insertions(+)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 949f59a44..65aacebb3 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -42,6 +42,7 @@ from .flickr import FlickrIE
from .francetv import (
PluzzIE,
FranceTvInfoIE,
+ France2IE,
)
from .freesound import FreesoundIE
from .funnyordie import FunnyOrDieIE
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index b8fe82e47..5e915bc03 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -65,3 +65,25 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
webpage = self._download_webpage(url, page_title)
video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
return self._extract_video(video_id)
+
+
+class France2IE(FranceTVBaseInfoExtractor):
+ IE_NAME = u'france2.fr'
+ _VALID_URL = r'https?://www\.france2\.fr/emissions/.*?/videos/(?P\d+)'
+
+ _TEST = {
+ u'url': u'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
+ u'file': u'75540104.mp4',
+ u'info_dict': {
+ u'title': u'13h15, le samedi...',
+ u'description': u'md5:2e5b58ba7a2d3692b35c792be081a03d',
+ },
+ u'params': {
+ u'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ return self._extract_video(video_id)
From 5b333c1ce6287badd89dacdd280a3876a09dcbcb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Mon, 23 Sep 2013 21:41:54 +0200
Subject: [PATCH 149/215] [francetv] Add an extractor for Generation Quoi
(closes #1475)
---
youtube_dl/extractor/__init__.py | 1 +
youtube_dl/extractor/francetv.py | 28 ++++++++++++++++++++++++++++
2 files changed, 29 insertions(+)
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 65aacebb3..d1b7e5f99 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -43,6 +43,7 @@ from .francetv import (
PluzzIE,
FranceTvInfoIE,
France2IE,
+ GenerationQuoiIE
)
from .freesound import FreesoundIE
from .funnyordie import FunnyOrDieIE
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index 5e915bc03..b1530e549 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -1,6 +1,7 @@
# encoding: utf-8
import re
import xml.etree.ElementTree
+import json
from .common import InfoExtractor
from ..utils import (
@@ -87,3 +88,30 @@ class France2IE(FranceTVBaseInfoExtractor):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
return self._extract_video(video_id)
+
+
+class GenerationQuoiIE(InfoExtractor):
+ IE_NAME = u'http://generation-quoi.france2.fr'
+ _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P.*)(\?|$)'
+
+ _TEST = {
+ u'url': u'http://generation-quoi.france2.fr/portrait/garde-a-vous',
+ u'file': u'k7FJX8VBcvvLmX4wA5Q.mp4',
+ u'info_dict': {
+ u'title': u'Génération Quoi - Garde à Vous',
+ u'uploader': u'Génération Quoi',
+ },
+ u'params': {
+ # It uses Dailymotion
+ u'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ name = mobj.group('name')
+ info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % name)
+ info_json = self._download_webpage(info_url, name)
+ info = json.loads(info_json)
+ return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],
+ ie='Dailymotion')
From 6f56389b8836301fc64f849e43ebd05043c0a66d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Tue, 24 Sep 2013 21:02:00 +0200
Subject: [PATCH 150/215] [youtube] update algos for length 86 and 84 (fixes
#1494)
---
devscripts/youtube_genalgo.py | 8 ++++----
youtube_dl/extractor/youtube.py | 4 ++--
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py
index f91e8855d..3b90a2fed 100644
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -27,15 +27,15 @@ tests = [
# 87
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
"uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
- # 86 - vfluy6kdb 2013/09/06
+ # 86 - vflHql6Pr 2013/09/24
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
- "yuioplkjhgfdsazxcvbnm12345678q0QWrRTYUIOELKJHGFD-AZXCVBNM!@#$%^&*()_<+={[|};?/>.S"),
+ ";}|[{=+-d)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYT_EWQ0987654321mnbvcxzas/fghjklpoiuytrewq"),
# 85 - vflkuzxcs 2013/09/11
('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[',
'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@'),
- # 84 - vflg0g8PQ 2013/08/29 (sporadic)
+ # 84 - vflHql6Pr 2013/09/24 (sporadic)
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
- ">?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWq0987654321mnbvcxzasdfghjklpoiuytr"),
+ "}[{=+-_)g*&^%$#@!MNBVCXZASDFGHJKLPOIUYTRE(Q0987654321mnbvcxzasdf?hjklpoiuytrewq"),
# 83
("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 47d5cb7ff..ec1cf8d30 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -431,11 +431,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
elif len(s) == 87:
return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
elif len(s) == 86:
- return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
+ return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[:16][::-1]
elif len(s) == 85:
return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
elif len(s) == 84:
- return s[81:36:-1] + s[0] + s[35:2:-1]
+ return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
elif len(s) == 83:
return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
elif len(s) == 82:
From bb0eee71e7b7519321694f3d68875bbd71affeb6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Tue, 24 Sep 2013 21:04:13 +0200
Subject: [PATCH 151/215] [youtube] Update one of the test's description
---
youtube_dl/extractor/youtube.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index ec1cf8d30..606ed21c9 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -352,7 +352,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u"info_dict": {
u"upload_date": u"20120506",
u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
- u"description": u"md5:3e2666e0a55044490499ea45fe9037b7",
+ u"description": u"md5:bdac09887d209a4ed54b8f76b2bdaa8b",
u"uploader": u"Icona Pop",
u"uploader_id": u"IconaPop"
}
From c3c88a2664595fd62898e44f8fc93c84e6d3c5a4 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Tue, 24 Sep 2013 21:04:43 +0200
Subject: [PATCH 152/215] Allow opts.cachedir == None to disable cache
---
youtube_dl/YoutubeDL.py | 2 +-
youtube_dl/__init__.py | 2 +-
youtube_dl/extractor/youtube.py | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index ead1ccb1c..a3a351ee6 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -82,7 +82,7 @@ class YoutubeDL(object):
daterange: A DateRange object, download only if the upload_date is in the range.
skip_download: Skip the actual download of the video file
cachedir: Location of the cache files in the filesystem.
- "NONE" to disable filesystem cache.
+ None to disable filesystem cache.
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index a4769a8ae..ebf4a300f 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -618,7 +618,7 @@ def _real_main(argv=None):
'min_filesize': opts.min_filesize,
'max_filesize': opts.max_filesize,
'daterange': date,
- 'cachedir': opts.cachedir,
+ 'cachedir': opts.cachedir if opts.cachedir != 'NONE' else None,
'youtube_print_sig_code': opts.youtube_print_sig_code,
})
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 049da2f91..a6eefdf4e 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -438,7 +438,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
cache_dir = self._downloader.params.get('cachedir',
u'~/.youtube-dl/cache')
- cache_enabled = cache_dir != u'NONE'
+ cache_enabled = cache_dir is not None
if cache_enabled:
cache_fn = os.path.join(os.path.expanduser(cache_dir),
u'youtube-sigfuncs',
From e35e4ddc9a4605a63a06c5bb12055bfceacb50b8 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Tue, 24 Sep 2013 21:18:03 +0200
Subject: [PATCH 153/215] Fix output of --youtube-print-sig-code when counting
down to 0
---
youtube_dl/extractor/youtube.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index a6eefdf4e..148b20160 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -488,8 +488,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
def gen_sig_code(idxs):
def _genslice(start, end, step):
starts = u'' if start == 0 else str(start)
- ends = u':%d' % (end+step)
- steps = u'' if step == 1 else (':%d' % step)
+ ends = (u':%d' % (end+step)) if end + step >= 0 else u':'
+ steps = u'' if step == 1 else (u':%d' % step)
return u's[%s%s%s]' % (starts, ends, steps)
step = None
From f2c327fd39d10115573d709f94f20721a80895fb Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Tue, 24 Sep 2013 21:20:42 +0200
Subject: [PATCH 154/215] Fix 86 signature (#1494)
---
youtube_dl/extractor/youtube.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 148b20160..e883a2c54 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1092,7 +1092,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
elif len(s) == 87:
return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:]
elif len(s) == 86:
- return s[5:34] + s[0] + s[35:38] + s[3] + s[39:45] + s[38] + s[46:53] + s[73] + s[54:73] + s[85] + s[74:85] + s[53]
+ return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1]
elif len(s) == 85:
return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84]
elif len(s) == 84:
From 7f747732547fedc876bcdcc77ba53a56324d7e87 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Tue, 24 Sep 2013 21:26:10 +0200
Subject: [PATCH 155/215] Add option --no-cache-dir
---
youtube_dl/__init__.py | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index ebf4a300f..46d0fbd64 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -167,7 +167,12 @@ def parseOpts(overrideArguments=None):
help='Output descriptions of all supported extractors', default=False)
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
- general.add_option('--cache-dir', dest='cachedir', default=u'~/.youtube-dl/cache', help='Location in the filesystem where youtube-dl can store downloaded information permanently. NONE to disable filesystem caching, %default by default')
+ general.add_option(
+ '--cache-dir', dest='cachedir', default=u'~/.youtube-dl/cache',
+ help='Location in the filesystem where youtube-dl can store downloaded information permanently. %default by default')
+ general.add_option(
+ '--no-cache-dir', action='store_const', const=None, dest='cachedir',
+ help='Disable filesystem caching')
selection.add_option('--playlist-start',
@@ -560,7 +565,7 @@ def _real_main(argv=None):
parser.error(u'Cannot download a video and extract audio into the same'
u' file! Use "%%(ext)s" instead of %r' %
determine_ext(outtmpl, u''))
-
+ raise ValueError(repr(opts.cachedir))
# YoutubeDL
ydl = YoutubeDL({
'usenetrc': opts.usenetrc,
@@ -618,7 +623,7 @@ def _real_main(argv=None):
'min_filesize': opts.min_filesize,
'max_filesize': opts.max_filesize,
'daterange': date,
- 'cachedir': opts.cachedir if opts.cachedir != 'NONE' else None,
+ 'cachedir': opts.cachedir,
'youtube_print_sig_code': opts.youtube_print_sig_code,
})
From 2cdeb20135d31ec568f016108d15735bfca33c10 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Tue, 24 Sep 2013 21:28:06 +0200
Subject: [PATCH 156/215] release 2013.09.24
---
youtube_dl/version.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 88d70b47a..e33421216 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.09.20.1'
+__version__ = '2013.09.24'
From e80d8610645232583b5aec93fcd446fa67152d0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Tue, 24 Sep 2013 21:38:37 +0200
Subject: [PATCH 157/215] Revert "[southparkstudios] Fix mgid extraction"
This reverts commit 0fd49457f5257dbe317c69314ee57a6c485d41a3.
It seems that the redesign was temporary.
---
youtube_dl/extractor/southparkstudios.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southparkstudios.py
index 1a611d3bb..b1e96b679 100644
--- a/youtube_dl/extractor/southparkstudios.py
+++ b/youtube_dl/extractor/southparkstudios.py
@@ -14,7 +14,7 @@ class SouthParkStudiosIE(MTVIE):
u'file': u'a7bff6c2-ed00-11e0-aca6-0026b9414f30.mp4',
u'info_dict': {
u'title': u'Bat Daded',
- u'description': u'Randy finally gets the chance to fight Bat Dad and gets the boys disqualified from the season championships.',
+ u'description': u'Randy disqualifies South Park by getting into a fight with Bat Dad.',
},
}
@@ -33,6 +33,6 @@ class SouthParkStudiosIE(MTVIE):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
- mgid = self._search_regex(r'data-mgid="(mgid:.*?)"',
+ mgid = self._search_regex(r'swfobject.embedSWF\(".*?(mgid:.*?)"',
webpage, u'mgid')
return self._get_videos_info(mgid)
From 8b25323ae2a6e144bdb7e46f60960a83487a8fda Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Tue, 24 Sep 2013 21:40:47 +0200
Subject: [PATCH 158/215] release 2013.09.24.1
---
README.md | 4 ++++
youtube_dl/version.py | 2 +-
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index f54945acc..fc8070c37 100644
--- a/README.md
+++ b/README.md
@@ -30,6 +30,10 @@ which means you can modify it, redistribute it or use it however you like.
--extractor-descriptions Output descriptions of all supported extractors
--proxy URL Use the specified HTTP/HTTPS proxy
--no-check-certificate Suppress HTTPS certificate validation.
+ --cache-dir None Location in the filesystem where youtube-dl can
+ store downloaded information permanently.
+ ~/.youtube-dl/cache by default
+ --no-cache-dir Disable filesystem caching
## Video Selection:
--playlist-start NUMBER playlist video to start at (default is 1)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index e33421216..cd39f658b 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.09.24'
+__version__ = '2013.09.24.1'
From 29c7a63df864cb0982119cec35677dbe568909c9 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Tue, 24 Sep 2013 21:55:25 +0200
Subject: [PATCH 159/215] Remove debugging code
---
youtube_dl/__init__.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 46d0fbd64..3851fc0a6 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -565,7 +565,7 @@ def _real_main(argv=None):
parser.error(u'Cannot download a video and extract audio into the same'
u' file! Use "%%(ext)s" instead of %r' %
determine_ext(outtmpl, u''))
- raise ValueError(repr(opts.cachedir))
+
# YoutubeDL
ydl = YoutubeDL({
'usenetrc': opts.usenetrc,
From b98d6a1e19dd8b7a5a45806aa21faad1f33c1515 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Tue, 24 Sep 2013 21:55:34 +0200
Subject: [PATCH 160/215] release 2013.09.24.2
---
youtube_dl/version.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index cd39f658b..8e6356dab 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.09.24.1'
+__version__ = '2013.09.24.2'
From 592882aa9f889432b07ad487f1a4228c9ae12818 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Thu, 26 Sep 2013 13:53:57 +0200
Subject: [PATCH 161/215] [brightcove] Support videos that only provide flv
versions (fixes #1504)
Moved the test from generic.py to brightcove.py
---
youtube_dl/extractor/brightcove.py | 62 +++++++++++++++++++++++-------
youtube_dl/extractor/generic.py | 11 ------
2 files changed, 49 insertions(+), 24 deletions(-)
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index 71e3c7883..859baae75 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -1,3 +1,5 @@
+# encoding: utf-8
+
import re
import json
import xml.etree.ElementTree
@@ -7,15 +9,37 @@ from ..utils import (
compat_urllib_parse,
find_xpath_attr,
compat_urlparse,
+
+ ExtractorError,
)
class BrightcoveIE(InfoExtractor):
_VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P.*)'
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
_PLAYLIST_URL_TEMPLATE = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'
-
- # There is a test for Brigtcove in GenericIE, that way we test both the download
- # and the detection of videos, and we don't have to find an URL that is always valid
+
+ _TESTS = [
+ {
+ u'url': u'http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/',
+ u'file': u'2371591881001.mp4',
+ u'md5': u'9e80619e0a94663f0bdc849b4566af19',
+ u'note': u'Test Brightcove downloads and detection in GenericIE',
+ u'info_dict': {
+ u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
+ u'uploader': u'8TV',
+ u'description': u'md5:a950cc4285c43e44d763d036710cd9cd',
+ }
+ },
+ {
+ u'url': u'http://medianetwork.oracle.com/video/player/1785452137001',
+ u'file': u'1785452137001.flv',
+ u'info_dict': {
+ u'title': u'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',
+ u'description': u'John Rose speaks at the JVM Language Summit, August 1, 2012.',
+ u'uploader': u'Oracle',
+ },
+ },
+ ]
@classmethod
def _build_brighcove_url(cls, object_str):
@@ -72,15 +96,27 @@ class BrightcoveIE(InfoExtractor):
playlist_title=playlist_info['mediaCollectionDTO']['displayName'])
def _extract_video_info(self, video_info):
- renditions = video_info['renditions']
- renditions = sorted(renditions, key=lambda r: r['size'])
- best_format = renditions[-1]
+ info = {
+ 'id': video_info['id'],
+ 'title': video_info['displayName'],
+ 'description': video_info.get('shortDescription'),
+ 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
+ 'uploader': video_info.get('publisherName'),
+ }
- return {'id': video_info['id'],
- 'title': video_info['displayName'],
- 'url': best_format['defaultURL'],
+ renditions = video_info.get('renditions')
+ if renditions:
+ renditions = sorted(renditions, key=lambda r: r['size'])
+ best_format = renditions[-1]
+ info.update({
+ 'url': best_format['defaultURL'],
'ext': 'mp4',
- 'description': video_info.get('shortDescription'),
- 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
- 'uploader': video_info.get('publisherName'),
- }
+ })
+ elif video_info.get('FLVFullLengthURL') is not None:
+ info.update({
+ 'url': video_info['FLVFullLengthURL'],
+ 'ext': 'flv',
+ })
+ else:
+ raise ExtractorError(u'Unable to extract video url for %s' % info['id'])
+ return info
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index f92e61fea..764070635 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -29,17 +29,6 @@ class GenericIE(InfoExtractor):
u"title": u"R\u00e9gis plante sa Jeep"
}
},
- {
- u'url': u'http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/',
- u'file': u'2371591881001.mp4',
- u'md5': u'9e80619e0a94663f0bdc849b4566af19',
- u'note': u'Test Brightcove downloads and detection in GenericIE',
- u'info_dict': {
- u'title': u'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
- u'uploader': u'8TV',
- u'description': u'md5:a950cc4285c43e44d763d036710cd9cd',
- }
- },
]
def report_download_webpage(self, video_id):
From 4de1994b6ed61a2aaddeee6452959d645fe5954b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Thu, 26 Sep 2013 18:59:56 +0200
Subject: [PATCH 162/215] [brightcove] Use direct url for the tests
The test_all_urls.py test failed because BrightcoveIE doesn't match them.
---
youtube_dl/extractor/brightcove.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index 859baae75..558b3d009 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -20,7 +20,8 @@ class BrightcoveIE(InfoExtractor):
_TESTS = [
{
- u'url': u'http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/',
+ # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
+ u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
u'file': u'2371591881001.mp4',
u'md5': u'9e80619e0a94663f0bdc849b4566af19',
u'note': u'Test Brightcove downloads and detection in GenericIE',
@@ -31,7 +32,8 @@ class BrightcoveIE(InfoExtractor):
}
},
{
- u'url': u'http://medianetwork.oracle.com/video/player/1785452137001',
+ # From http://medianetwork.oracle.com/video/player/1785452137001
+ u'url': u'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001',
u'file': u'1785452137001.flv',
u'info_dict': {
u'title': u'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',
From ce65fb6c76e4496a35cd597bbc735e0351d82853 Mon Sep 17 00:00:00 2001
From: rzhxeo
Date: Fri, 27 Sep 2013 05:50:16 +0200
Subject: [PATCH 163/215] [RTLnowIE] Add support for http://rtlnitronow.de
---
youtube_dl/extractor/rtlnow.py | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py
index 7bb236c2b..963e0cc8f 100644
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@@ -8,8 +8,8 @@ from ..utils import (
)
class RTLnowIE(InfoExtractor):
- """Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW"""
- _VALID_URL = r'(?:http://)?(?P(?Prtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
+ """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW and VOX NOW"""
+ _VALID_URL = r'(?:http://)?(?P(?Prtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?rtlnitronow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
_TESTS = [{
u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
u'file': u'90419.flv',
@@ -61,6 +61,19 @@ class RTLnowIE(InfoExtractor):
u'params': {
u'skip_download': True,
},
+ },
+ {
+ u'url': u'http://www.rtlnitronow.de/recht-ordnung/fahrradpolizei-koeln-fischereiaufsicht-ruegen.php?film_id=124311&player=1&season=1',
+ u'file': u'124311.flv',
+ u'info_dict': {
+ u'upload_date': u'20130830',
+ u'title': u'Recht & Ordnung - Fahrradpolizei Köln & Fischereiaufsicht Rügen',
+ u'description': u'Fahrradpolizei Köln & Fischereiaufsicht Rügen',
+ u'thumbnail': u'http://autoimg.static-fra.de/nitronow/338273/1500x1500/image2.jpg'
+ },
+ u'params': {
+ u'skip_download': True,
+ },
}]
def _real_extract(self,url):
From 63efc427cd4a2e0892e02e0519134d760b30814a Mon Sep 17 00:00:00 2001
From: rzhxeo
Date: Fri, 27 Sep 2013 06:00:37 +0200
Subject: [PATCH 164/215] [RTLnowIE] Clean video title
The title of some videos has the following format:
Series - Episode | Series online schauen bei ... NOW
---
youtube_dl/extractor/rtlnow.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py
index 7bb236c2b..3783aa538 100644
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@@ -79,7 +79,7 @@ class RTLnowIE(InfoExtractor):
msg = clean_html(note_m.group(1))
raise ExtractorError(msg)
- video_title = self._html_search_regex(r'(?P[^<]+)',
+ video_title = self._html_search_regex(r'(?P[^<]+?)( \| [^<]*)?',
webpage, u'title')
playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P[^\']+)\'',
webpage, u'playerdata_url')
From 920de7a27d11a8f162e108c5891de70db738693a Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Fri, 27 Sep 2013 06:15:21 +0200
Subject: [PATCH 165/215] [youtube] Fix 83 signature (Closes #1511)
---
youtube_dl/extractor/youtube.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 6beda8f3b..89c41efe5 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1067,6 +1067,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
self._downloader.report_warning(
u'Warning: Falling back to static signature algorithm')
+
return self._static_decrypt_signature(
s, video_id, player_url, age_gate)
@@ -1098,7 +1099,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
elif len(s) == 84:
return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1]
elif len(s) == 83:
- return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
+ return s[80:63:-1] + s[0] + s[62:0:-1] + s[63]
elif len(s) == 82:
return s[80:73:-1] + s[81] + s[72:54:-1] + s[2] + s[53:43:-1] + s[0] + s[42:2:-1] + s[43] + s[1] + s[54]
elif len(s) == 81:
From 74bab3f0a4b601a7618f279afbd352bbc51dc3ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Fri, 27 Sep 2013 08:08:43 +0200
Subject: [PATCH 166/215] Don't embed subtitles if the list is empty or the
field is not set (fixes #1510)
---
youtube_dl/PostProcessor.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py
index ae56d2082..3ee1d3c58 100644
--- a/youtube_dl/PostProcessor.py
+++ b/youtube_dl/PostProcessor.py
@@ -444,8 +444,11 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
if information['ext'] != u'mp4':
self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
return True, information
- sub_langs = [key for key in information['subtitles']]
+ if not information.get('subtitles'):
+ self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed')
+ return True, information
+ sub_langs = [key for key in information['subtitles']]
filename = information['filepath']
input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
From 509f398292ff4b9dffecd3a85cd02b4922319b13 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Fri, 27 Sep 2013 13:08:45 +0200
Subject: [PATCH 167/215] Remove youtube_genalgo (#1515)
With the automatic signature extraction, this script has become superfluous now
---
devscripts/youtube_genalgo.py | 116 ----------------------------------
1 file changed, 116 deletions(-)
delete mode 100644 devscripts/youtube_genalgo.py
diff --git a/devscripts/youtube_genalgo.py b/devscripts/youtube_genalgo.py
deleted file mode 100644
index 3b90a2fed..000000000
--- a/devscripts/youtube_genalgo.py
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-# Generate youtube signature algorithm from test cases
-
-import sys
-
-tests = [
- # 93 - vfl79wBKW 2013/07/20
- (u"qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"€",
- u".>/?;:|}][{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ098765'321mnbvcxzasdfghjklpoiu"),
- # 92 - vflQw-fB4 2013/07/17
- ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~\"",
- "mrtyuioplkjhgfdsazxcvbnq1234567890QWERTY}IOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]\"|:;"),
- # 91 - vfl79wBKW 2013/07/20 (sporadic)
- ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`~",
- "/?;:|}][{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543.1mnbvcxzasdfghjklpoiu"),
- # 90
- ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'`",
- "mrtyuioplkjhgfdsazxcvbne1234567890QWER[YUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={`]}|"),
- # 89
- ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<'",
- "/?;:|}<[{=+-_)(*&^%$#@!MqBVCXZASDFGHJKLPOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuyt"),
- # 88 - vflapUV9V 2013/08/28
- ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[]}|:;?/>.<",
- "ioplkjhgfdsazxcvbnm12<4567890QWERTYUIOZLKJHGFDSAeXCVBNM!@#$%^&*()_-+={[]}|:;?/>.3"),
- # 87
- ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$^&*()_-+={[]}|:;?/>.<",
- "uioplkjhgfdsazxcvbnm1t34567890QWE2TYUIOPLKJHGFDSAZXCVeNM!@#$^&*()_-+={[]}|:;?/>.<"),
- # 86 - vflHql6Pr 2013/09/24
- ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
- ";}|[{=+-d)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYT_EWQ0987654321mnbvcxzas/fghjklpoiuytrewq"),
- # 85 - vflkuzxcs 2013/09/11
- ('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[',
- '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@'),
- # 84 - vflHql6Pr 2013/09/24 (sporadic)
- ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
- "}[{=+-_)g*&^%$#@!MNBVCXZASDFGHJKLPOIUYTRE(Q0987654321mnbvcxzasdf?hjklpoiuytrewq"),
- # 83
- ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
- ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
- # 82 - vflGNjMhJ 2013/09/12
- ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.<",
- ".>/?;}[<=+-(*&^%$#@!MNBVCXeASDFGHKLPOqUYTREWQ0987654321mnbvcxzasdfghjklpoiuytrIwZ"),
- # 81 - vflLC8JvQ 2013/07/25
- ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>.",
- "C>/?;}[{=+-(*&^%$#@!MNBVYXZASDFGHKLPOIU.TREWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
- # 80 - vflZK4ZYR 2013/08/23 (sporadic)
- ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/>",
- "wertyuioplkjhgfdsaqxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&z(-+={[};?/>"),
- # 79 - vflLC8JvQ 2013/07/25 (sporadic)
- ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKHGFDSAZXCVBNM!@#$%^&*(-+={[};?/",
- "Z?;}[{=+-(*&^%$#@!MNBVCXRASDFGHKLPOIUYT/EWQ0q87659321mnbvcxzasdfghjkl4oiuytrewp"),
-]
-
-tests_age_gate = [
- # 86 - vflqinMWD
- ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[|};?/>.<",
- "ertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!/#$%^&*()_-+={[|};?@"),
-]
-
-def find_matching(wrong, right):
- idxs = [wrong.index(c) for c in right]
- return compress(idxs)
- return ('s[%d]' % i for i in idxs)
-
-def compress(idxs):
- def _genslice(start, end, step):
- starts = '' if start == 0 else str(start)
- ends = ':%d' % (end+step)
- steps = '' if step == 1 else (':%d' % step)
- return 's[%s%s%s]' % (starts, ends, steps)
-
- step = None
- for i, prev in zip(idxs[1:], idxs[:-1]):
- if step is not None:
- if i - prev == step:
- continue
- yield _genslice(start, prev, step)
- step = None
- continue
- if i - prev in [-1, 1]:
- step = i - prev
- start = prev
- continue
- else:
- yield 's[%d]' % prev
- if step is None:
- yield 's[%d]' % i
- else:
- yield _genslice(start, i, step)
-
-def _assert_compress(inp, exp):
- res = list(compress(inp))
- if res != exp:
- print('Got %r, expected %r' % (res, exp))
- assert res == exp
-_assert_compress([0,2,4,6], ['s[0]', 's[2]', 's[4]', 's[6]'])
-_assert_compress([0,1,2,4,6,7], ['s[:3]', 's[4]', 's[6:8]'])
-_assert_compress([8,0,1,2,4,7,6,9], ['s[8]', 's[:3]', 's[4]', 's[7:5:-1]', 's[9]'])
-
-def gen(wrong, right, indent):
- code = ' + '.join(find_matching(wrong, right))
- return 'if len(s) == %d:\n%s return %s\n' % (len(wrong), indent, code)
-
-def genall(tests):
- indent = ' ' * 8
- return indent + (indent + 'el').join(gen(wrong, right, indent) for wrong,right in tests)
-
-def main():
- print(genall(tests))
- print(u' Age gate:')
- print(genall(tests_age_gate))
-
-if __name__ == '__main__':
- main()
From 0a60edcfa975e4f791923574b3f888e3ffe72c43 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Fri, 27 Sep 2013 14:19:19 +0200
Subject: [PATCH 168/215] Don't fail if the video thumbnail couldn't be
downloaded (fixes #1516)
Just report a warning
---
youtube_dl/YoutubeDL.py | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index a3a351ee6..44a272e7e 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -545,11 +545,15 @@ class YoutubeDL(object):
thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
(info_dict['extractor'], info_dict['id']))
- uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
- with open(thumb_filename, 'wb') as thumbf:
- shutil.copyfileobj(uf, thumbf)
- self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
- (info_dict['extractor'], info_dict['id'], thumb_filename))
+ try:
+ uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
+ with open(thumb_filename, 'wb') as thumbf:
+ shutil.copyfileobj(uf, thumbf)
+ self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
+ (info_dict['extractor'], info_dict['id'], thumb_filename))
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self.report_warning(u'Unable to download thumbnail "%s": %s' %
+ (info_dict['thumbnail'], compat_str(err)))
if not self.params.get('skip_download', False):
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
From 2dc592991aac5e0b3b91e3d2123490184033177e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Fri, 27 Sep 2013 14:20:52 +0200
Subject: [PATCH 169/215] [youtube] update description of test
---
youtube_dl/extractor/youtube.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 89c41efe5..9aee2ebf2 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -361,7 +361,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u"info_dict": {
u"upload_date": u"20120506",
u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]",
- u"description": u"md5:bdac09887d209a4ed54b8f76b2bdaa8b",
+ u"description": u"md5:5b292926389560516e384ac437c0ec07",
u"uploader": u"Icona Pop",
u"uploader_id": u"IconaPop"
}
From f490e77e77c9db082e073f002088d021b16513ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Fri, 27 Sep 2013 14:22:36 +0200
Subject: [PATCH 170/215] [youtube] Set the thumbnail to None if it can't be
extracted
---
youtube_dl/extractor/youtube.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 9aee2ebf2..618d87515 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1360,7 +1360,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
video_thumbnail = m_thumb.group(1)
elif 'thumbnail_url' not in video_info:
self._downloader.report_warning(u'unable to extract video thumbnail')
- video_thumbnail = ''
+ video_thumbnail = None
else: # don't panic if we can't find it
video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0])
From 9abb32045a85e1ecc831c624494ad41af3997e20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Fri, 27 Sep 2013 15:06:27 +0200
Subject: [PATCH 171/215] [youtube] Add hlsvp to the error message if it can't
be found and remove the live stream test
It's no longer available, other olympics streams have the same problem.
---
youtube_dl/extractor/youtube.py | 17 +----------------
1 file changed, 1 insertion(+), 16 deletions(-)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 618d87515..53f13b516 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -378,21 +378,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
u"uploader_id": u"justintimberlakeVEVO"
}
},
- {
- u'url': u'https://www.youtube.com/watch?v=TGi3HqYrWHE',
- u'file': u'TGi3HqYrWHE.mp4',
- u'note': u'm3u8 video',
- u'info_dict': {
- u'title': u'Triathlon - Men - London 2012 Olympic Games',
- u'description': u'- Men - TR02 - Triathlon - 07 August 2012 - London 2012 Olympic Games',
- u'uploader': u'olympic',
- u'upload_date': u'20120807',
- u'uploader_id': u'olympic',
- },
- u'params': {
- u'skip_download': True,
- },
- },
]
@@ -1480,7 +1465,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
return
else:
- raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info')
+ raise ExtractorError(u'no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
results = []
for format_param, video_real_url in video_url_list:
From 0b7c2485b66d53ad14bc331e867927b370599e43 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sat, 28 Sep 2013 15:43:34 +0200
Subject: [PATCH 172/215] [zdf] Add support for hash URLs and simplify (#1518)
---
youtube_dl/extractor/zdf.py | 76 +++++++++++++++++++++----------------
1 file changed, 43 insertions(+), 33 deletions(-)
diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py
index 418509cb9..faed7ff7f 100644
--- a/youtube_dl/extractor/zdf.py
+++ b/youtube_dl/extractor/zdf.py
@@ -2,16 +2,14 @@ import re
from .common import InfoExtractor
from ..utils import (
+ determine_ext,
ExtractorError,
- unescapeHTML,
)
+
class ZDFIE(InfoExtractor):
- _VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek\/(.*beitrag\/video\/)(?P[^/\?]+)(?:\?.*)?'
- _TITLE = r'(?P.*)
'
+ _VALID_URL = r'^http://www\.zdf\.de\/ZDFmediathek(?P#)?\/(.*beitrag\/video\/)(?P[^/\?]+)(?:\?.*)?'
_MEDIA_STREAM = r''
- _MMS_STREAM = r'href="(?Pmms://[^"]*)"'
- _RTSP_STREAM = r'(?Prtsp://[^"]*.mp4)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -19,6 +17,9 @@ class ZDFIE(InfoExtractor):
raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('video_id')
+ if mobj.group('hash'):
+ url = url.replace(u'#', u'', 1)
+
html = self._download_webpage(url, video_id)
streams = [m.groupdict() for m in re.finditer(self._MEDIA_STREAM, html)]
if streams is None:
@@ -27,39 +28,48 @@ class ZDFIE(InfoExtractor):
# s['media_type'] == 'wstreaming' -> use 'Windows Media Player' and mms url
# s['media_type'] == 'hstreaming' -> use 'Quicktime' and rtsp url
# choose first/default media type and highest quality for now
- for s in streams: #find 300 - dsl1000mbit
- if s['quality'] == '300' and s['media_type'] == 'wstreaming':
- stream_=s
- break
- for s in streams: #find veryhigh - dsl2000mbit
- if s['quality'] == 'veryhigh' and s['media_type'] == 'wstreaming': # 'hstreaming' - rtsp is not working
- stream_=s
- break
- if stream_ is None:
+ def stream_pref(s):
+ TYPE_ORDER = ['ostreaming', 'hstreaming', 'wstreaming']
+ try:
+ type_pref = TYPE_ORDER.index(s['media_type'])
+ except ValueError:
+ type_pref = 999
+
+ QUALITY_ORDER = ['veryhigh', '300']
+ try:
+ quality_pref = QUALITY_ORDER.index(s['quality'])
+ except ValueError:
+ quality_pref = 999
+
+ return (type_pref, quality_pref)
+
+ sorted_streams = sorted(streams, key=stream_pref)
+ if not sorted_streams:
raise ExtractorError(u'No stream found.')
+ stream = sorted_streams[0]
- media_link = self._download_webpage(stream_['video_url'], video_id,'Get stream URL')
+ media_link = self._download_webpage(
+ stream['video_url'],
+ video_id,
+ u'Get stream URL')
- self.report_extraction(video_id)
- mobj = re.search(self._TITLE, html)
+ MMS_STREAM = r'href="(?Pmms://[^"]*)"'
+ RTSP_STREAM = r'(?Prtsp://[^"]*.mp4)'
+
+ mobj = re.search(self._MEDIA_STREAM, media_link)
if mobj is None:
- raise ExtractorError(u'Cannot extract title')
- title = unescapeHTML(mobj.group('title'))
-
- mobj = re.search(self._MMS_STREAM, media_link)
- if mobj is None:
- mobj = re.search(self._RTSP_STREAM, media_link)
+ mobj = re.search(RTSP_STREAM, media_link)
if mobj is None:
raise ExtractorError(u'Cannot extract mms:// or rtsp:// URL')
- mms_url = mobj.group('video_url')
+ video_url = mobj.group('video_url')
- mobj = re.search('(.*)[.](?P[^.]+)', mms_url)
- if mobj is None:
- raise ExtractorError(u'Cannot extract extention')
- ext = mobj.group('ext')
+ title = self._html_search_regex(
+ r'(.*?)
',
+ html, u'title')
- return [{'id': video_id,
- 'url': mms_url,
- 'title': title,
- 'ext': ext
- }]
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'ext': determine_ext(video_url)
+ }
From 9c15e9de849641143e7654f2656c68e066fe9e2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sat, 28 Sep 2013 21:19:52 +0200
Subject: [PATCH 173/215] [yahoo] Fix video extraction (fixes #1521)
There's no need to use two different methods.
Now we can also download videos over http if possible.
Also run the test for rtmp videos, but skip the download.
---
youtube_dl/extractor/yahoo.py | 132 +++++++++++++++++-----------------
1 file changed, 65 insertions(+), 67 deletions(-)
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py
index 32d5b9477..39126e631 100644
--- a/youtube_dl/extractor/yahoo.py
+++ b/youtube_dl/extractor/yahoo.py
@@ -1,4 +1,3 @@
-import datetime
import itertools
import json
import re
@@ -6,86 +5,85 @@ import re
from .common import InfoExtractor, SearchInfoExtractor
from ..utils import (
compat_urllib_parse,
-
- ExtractorError,
+ compat_urlparse,
+ determine_ext,
+ clean_html,
)
+
class YahooIE(InfoExtractor):
IE_DESC = u'Yahoo screen'
_VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P\d*?)\.html'
- _TEST = {
- u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
- u'file': u'214727115.flv',
- u'md5': u'2e717f169c1be93d84d3794a00d4a325',
- u'info_dict': {
- u"title": u"Julian Smith & Travis Legg Watch Julian Smith"
+ _TESTS = [
+ {
+ u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
+ u'file': u'214727115.mp4',
+ u'info_dict': {
+ u'title': u'Julian Smith & Travis Legg Watch Julian Smith',
+ u'description': u'Julian and Travis watch Julian Smith',
+ },
},
- u'skip': u'Requires rtmpdump'
- }
+ {
+ u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
+ u'file': u'103000935.flv',
+ u'info_dict': {
+ u'title': u'The Cougar Lies with Spanish Moss',
+ u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?',
+ },
+ u'params': {
+ # Requires rtmpdump
+ u'skip_download': True,
+ },
+ },
+ ]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError(u'Invalid URL: %s' % url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
- m_id = re.search(r'YUI\.namespace\("Media"\)\.CONTENT_ID = "(?P.+?)";', webpage)
- if m_id is None:
- # TODO: Check which url parameters are required
- info_url = 'http://cosmos.bcst.yahoo.com/rest/v2/pops;lmsoverride=1;outputformat=mrss;cb=974419660;id=%s;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id
- webpage = self._download_webpage(info_url, video_id, u'Downloading info webpage')
- info_re = r'''.*?)\]\]>.*
- .*?)\]\]>.*
- .*?)\ .*\]\]>.*
-
Date: Sun, 29 Sep 2013 12:44:02 +0200
Subject: [PATCH 174/215] [dailymotion] Disable the family filter in the
playlists (fixes #1524)
---
youtube_dl/extractor/dailymotion.py | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index 64b89aae8..3f012aedc 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -14,8 +14,15 @@ from ..utils import (
ExtractorError,
)
+class DailymotionBaseInfoExtractor(InfoExtractor):
+ @staticmethod
+ def _build_request(url):
+ """Build a request with the family filter disabled"""
+ request = compat_urllib_request.Request(url)
+ request.add_header('Cookie', 'family_filter=off')
+ return request
-class DailymotionIE(SubtitlesInfoExtractor):
+class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
"""Information Extractor for Dailymotion"""
_VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/(?:embed/)?video/([^/]+)'
@@ -40,8 +47,7 @@ class DailymotionIE(SubtitlesInfoExtractor):
url = 'http://www.dailymotion.com/video/%s' % video_id
# Retrieve video webpage to extract further information
- request = compat_urllib_request.Request(url)
- request.add_header('Cookie', 'family_filter=off')
+ request = self._build_request(url)
webpage = self._download_webpage(request, video_id)
# Extract URL, uploader and title from webpage
@@ -113,7 +119,7 @@ class DailymotionIE(SubtitlesInfoExtractor):
return {}
-class DailymotionPlaylistIE(InfoExtractor):
+class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
IE_NAME = u'dailymotion:playlist'
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P.+?)/'
_MORE_PAGES_INDICATOR = r''
@@ -122,7 +128,8 @@ class DailymotionPlaylistIE(InfoExtractor):
def _extract_entries(self, id):
video_ids = []
for pagenum in itertools.count(1):
- webpage = self._download_webpage(self._PAGE_TEMPLATE % (id, pagenum),
+ request = self._build_request(self._PAGE_TEMPLATE % (id, pagenum))
+ webpage = self._download_webpage(request,
id, u'Downloading page %s' % pagenum)
playlist_el = get_element_by_attribute(u'class', u'video_list', webpage)
From 46353f6783b9e468c9271c864f0711c85d3cea33 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sun, 29 Sep 2013 11:17:38 +0200
Subject: [PATCH 175/215] [update] Look for .exe extension on Windows (Fixes
#745)
---
youtube_dl/__init__.py | 2 +-
youtube_dl/update.py | 10 ++++++++--
2 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 3851fc0a6..28a7bdd92 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -658,7 +658,7 @@ def _real_main(argv=None):
# Update version
if opts.update_self:
- update_self(ydl.to_screen, opts.verbose, sys.argv[0])
+ update_self(ydl.to_screen, opts.verbose)
# Maybe do nothing
if len(all_urls) < 1:
diff --git a/youtube_dl/update.py b/youtube_dl/update.py
index ccab6f27f..669b59a68 100644
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@@ -1,6 +1,7 @@
import json
import traceback
import hashlib
+import sys
from zipimport import zipimporter
from .utils import *
@@ -34,7 +35,7 @@ def rsa_verify(message, signature, key):
if signature != sha256(message).digest(): return False
return True
-def update_self(to_screen, verbose, filename):
+def update_self(to_screen, verbose):
"""Update the program file with the latest version from the repository"""
UPDATE_URL = "http://rg3.github.io/youtube-dl/update/"
@@ -42,7 +43,6 @@ def update_self(to_screen, verbose, filename):
JSON_URL = UPDATE_URL + 'versions.json'
UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
-
if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, "frozen"):
to_screen(u'It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
return
@@ -80,6 +80,12 @@ def update_self(to_screen, verbose, filename):
print_notes(to_screen, versions_info['versions'])
+ filename = sys.argv[0]
+ # Py2EXE: Filename could be different
+ if hasattr(sys, "frozen") and not os.path.isfile(filename):
+ if os.path.isfile(filename + u'.exe'):
+ filename += u'.exe'
+
if not os.access(filename, os.W_OK):
to_screen(u'ERROR: no write permissions on %s' % filename)
return
From d27903703673e565a3a1e8dd418d1347ef331b3e Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sun, 29 Sep 2013 11:26:01 +0200
Subject: [PATCH 176/215] [update] Prevent cmd window popup on Windows (Fixes
#1478)
---
youtube_dl/update.py | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/youtube_dl/update.py b/youtube_dl/update.py
index 669b59a68..0689a4891 100644
--- a/youtube_dl/update.py
+++ b/youtube_dl/update.py
@@ -1,6 +1,8 @@
+import io
import json
import traceback
import hashlib
+import subprocess
import sys
from zipimport import zipimporter
@@ -75,8 +77,9 @@ def update_self(to_screen, verbose):
to_screen(u'ERROR: the versions file signature is invalid. Aborting.')
return
- to_screen(u'Updating to version ' + versions_info['latest'] + '...')
- version = versions_info['versions'][versions_info['latest']]
+ version_id = versions_info['latest']
+ to_screen(u'Updating to version ' + version_id + '...')
+ version = versions_info['versions'][version_id]
print_notes(to_screen, versions_info['versions'])
@@ -122,16 +125,18 @@ def update_self(to_screen, verbose):
try:
bat = os.path.join(directory, 'youtube-dl-updater.bat')
- b = open(bat, 'w')
- b.write("""
-echo Updating youtube-dl...
+ with io.open(bat, 'w') as batfile:
+ batfile.write(u"""
+@echo off
+echo Waiting for file handle to be closed ...
ping 127.0.0.1 -n 5 -w 1000 > NUL
-move /Y "%s.new" "%s"
-del "%s"
- \n""" %(exe, exe, bat))
- b.close()
+move /Y "%s.new" "%s" > NUL
+echo Updated youtube-dl to version %s.
+start /b "" cmd /c del "%%~f0"&exit /b"
+ \n""" % (exe, exe, version_id))
- os.startfile(bat)
+ subprocess.Popen([bat]) # Continues to run in the background
+ return # Do not show premature success messages
except (IOError, OSError) as err:
if verbose: to_screen(compat_str(traceback.format_exc()))
to_screen(u'ERROR: unable to overwrite current version')
From 138a5454b5f2af27b0b31764a8125cad23fd3429 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister
Date: Sun, 29 Sep 2013 14:38:37 +0200
Subject: [PATCH 177/215] release 2013.09.29
---
youtube_dl/version.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 8e6356dab..e3e5d5538 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2013.09.24.2'
+__version__ = '2013.09.29'
From 843530568f326294d714b5b9f11bbf6176d73ccf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sun, 29 Sep 2013 20:49:58 +0200
Subject: [PATCH 178/215] [appletrailers] Rework extraction (fixes #1387)
The exraction was broken:
* The includes page contains img elements that need to be fixed.
* Use the 'itunes.inc' page, it contains a json dictionary for each trailer with information.
* Get the formats from 'includes/settings{trailer_name}.json'
* Use urljoin to allow urls with a fragment identifier to work
Removed the thumbnail urls from the tests, they are different now.
---
youtube_dl/extractor/appletrailers.py | 112 ++++++++++----------------
1 file changed, 42 insertions(+), 70 deletions(-)
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py
index 8b191c196..b86c4b909 100644
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -1,8 +1,10 @@
import re
import xml.etree.ElementTree
+import json
from .common import InfoExtractor
from ..utils import (
+ compat_urlparse,
determine_ext,
)
@@ -14,10 +16,9 @@ class AppleTrailersIE(InfoExtractor):
u"playlist": [
{
u"file": u"manofsteel-trailer4.mov",
- u"md5": u"11874af099d480cc09e103b189805d5f",
+ u"md5": u"d97a8e575432dbcb81b7c3acb741f8a8",
u"info_dict": {
u"duration": 111,
- u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
u"title": u"Trailer 4",
u"upload_date": u"20130523",
u"uploader_id": u"wb",
@@ -25,10 +26,9 @@ class AppleTrailersIE(InfoExtractor):
},
{
u"file": u"manofsteel-trailer3.mov",
- u"md5": u"07a0a262aae5afe68120eed61137ab34",
+ u"md5": u"b8017b7131b721fb4e8d6f49e1df908c",
u"info_dict": {
u"duration": 182,
- u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
u"title": u"Trailer 3",
u"upload_date": u"20130417",
u"uploader_id": u"wb",
@@ -36,10 +36,9 @@ class AppleTrailersIE(InfoExtractor):
},
{
u"file": u"manofsteel-trailer.mov",
- u"md5": u"e401fde0813008e3307e54b6f384cff1",
+ u"md5": u"d0f1e1150989b9924679b441f3404d48",
u"info_dict": {
u"duration": 148,
- u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
u"title": u"Trailer",
u"upload_date": u"20121212",
u"uploader_id": u"wb",
@@ -47,10 +46,9 @@ class AppleTrailersIE(InfoExtractor):
},
{
u"file": u"manofsteel-teaser.mov",
- u"md5": u"76b392f2ae9e7c98b22913c10a639c97",
+ u"md5": u"5fe08795b943eb2e757fa95cb6def1cb",
u"info_dict": {
u"duration": 93,
- u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
u"title": u"Teaser",
u"upload_date": u"20120721",
u"uploader_id": u"wb",
@@ -59,87 +57,61 @@ class AppleTrailersIE(InfoExtractor):
]
}
+ _JSON_RE = r'iTunes.playURL\((.*?)\);'
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
movie = mobj.group('movie')
uploader_id = mobj.group('company')
- playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc'
+ playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc')
playlist_snippet = self._download_webpage(playlist_url, movie)
- playlist_cleaned = re.sub(r'(?s)', u'', playlist_snippet)
+ playlist_cleaned = re.sub(r'(?s)', u'', playlist_snippet)
+ playlist_cleaned = re.sub(r'
', r'
', playlist_cleaned)
+ # The ' in the onClick attributes are not escaped, it couldn't be parsed
+ # with xml.etree.ElementTree.fromstring
+ # like: http://trailers.apple.com/trailers/wb/gravity/
+ def _clean_json(m):
+ return u'iTunes.playURL(%s);' % m.group(1).replace('\'', ''')
+ playlist_cleaned = re.sub(self._JSON_RE, _clean_json, playlist_cleaned)
playlist_html = u'' + playlist_cleaned + u''
- size_cache = {}
-
doc = xml.etree.ElementTree.fromstring(playlist_html)
playlist = []
for li in doc.findall('./div/ul/li'):
- title = li.find('.//h3').text
+ on_click = li.find('.//a').attrib['onClick']
+ trailer_info_json = self._search_regex(self._JSON_RE,
+ on_click, u'trailer info')
+ trailer_info = json.loads(trailer_info_json)
+ title = trailer_info['title']
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
thumbnail = li.find('.//img').attrib['src']
+ upload_date = trailer_info['posted'].replace('-', '')
- date_el = li.find('.//p')
- upload_date = None
- m = re.search(r':\s?(?P[0-9]{2})/(?P[0-9]{2})/(?P[0-9]{2})', date_el.text)
- if m:
- upload_date = u'20' + m.group('year') + m.group('month') + m.group('day')
- runtime_el = date_el.find('./br')
- m = re.search(r':\s?(?P[0-9]+):(?P[0-9]{1,2})', runtime_el.tail)
+ runtime = trailer_info['runtime']
+ m = re.search(r'(?P[0-9]+):(?P[0-9]{1,2})', runtime)
duration = None
if m:
duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
+ first_url = trailer_info['url']
+ trailer_id = first_url.split('/')[-1].rpartition('_')[0]
+ settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
+ settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json')
+ settings = json.loads(settings_json)
+
formats = []
- for formats_el in li.findall('.//a'):
- if formats_el.attrib['class'] != 'OverlayPanel':
- continue
- target = formats_el.attrib['target']
-
- format_code = formats_el.text
- if 'Automatic' in format_code:
- continue
-
- size_q = formats_el.attrib['href']
- size_id = size_q.rpartition('#videos-')[2]
- if size_id not in size_cache:
- size_url = url + size_q
- sizepage_html = self._download_webpage(
- size_url, movie,
- note=u'Downloading size info %s' % size_id,
- errnote=u'Error while downloading size info %s' % size_id,
- )
- _doc = xml.etree.ElementTree.fromstring(sizepage_html)
- size_cache[size_id] = _doc
-
- sizepage_doc = size_cache[size_id]
- links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
- for vid_a in links:
- href = vid_a.get('href')
- if not href.endswith(target):
- continue
- detail_q = href.partition('#')[0]
- detail_url = url + '/' + detail_q
-
- m = re.match(r'includes/(?P[^/]+)/', detail_q)
- detail_id = m.group('detail_id')
-
- detail_html = self._download_webpage(
- detail_url, movie,
- note=u'Downloading detail %s %s' % (detail_id, size_id),
- errnote=u'Error while downloading detail %s %s' % (detail_id, size_id)
- )
- detail_doc = xml.etree.ElementTree.fromstring(detail_html)
- movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a')
- assert movie_link_el.get('class') == 'movieLink'
- movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h')
- ext = determine_ext(movie_link)
- assert ext == 'mov'
-
- formats.append({
- 'format': format_code,
- 'ext': ext,
- 'url': movie_link,
- })
+ for format in settings['metadata']['sizes']:
+ # The src is a file pointing to the real video file
+ format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
+ formats.append({
+ 'url': format_url,
+ 'ext': determine_ext(format_url),
+ 'format': format['type'],
+ 'width': format['width'],
+ 'height': int(format['height']),
+ })
+ formats = sorted(formats, key=lambda f: (f['height'], f['width']))
info = {
'_type': 'video',
From bb4aa62cf7ad3d5aae4edf56ab8954c80a2d8956 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sun, 29 Sep 2013 20:59:19 +0200
Subject: [PATCH 179/215] [appletrailers] The request for the settings must
have the trailer name in lower case (fixes #1329)
---
youtube_dl/extractor/appletrailers.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py
index b86c4b909..6d6237f8a 100644
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -95,7 +95,7 @@ class AppleTrailersIE(InfoExtractor):
duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
first_url = trailer_info['url']
- trailer_id = first_url.split('/')[-1].rpartition('_')[0]
+ trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json')
settings = json.loads(settings_json)
From 722076a123c60ed6d5a978c4bc2609f46c8e3ee9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
Date: Sun, 29 Sep 2013 23:07:26 +0200
Subject: [PATCH 180/215] [rtlnow] Replace one of the tests
The video is no longer available.
---
youtube_dl/extractor/rtlnow.py | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py
index e6fa0475e..32541077f 100644
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@@ -63,13 +63,13 @@ class RTLnowIE(InfoExtractor):
},
},
{
- u'url': u'http://www.rtlnitronow.de/recht-ordnung/fahrradpolizei-koeln-fischereiaufsicht-ruegen.php?film_id=124311&player=1&season=1',
- u'file': u'124311.flv',
+ u'url': u'http://www.rtlnitronow.de/recht-ordnung/lebensmittelkontrolle-erlangenordnungsamt-berlin.php?film_id=127367&player=1&season=1',
+ u'file': u'127367.flv',
u'info_dict': {
- u'upload_date': u'20130830',
- u'title': u'Recht & Ordnung - Fahrradpolizei Köln & Fischereiaufsicht Rügen',
- u'description': u'Fahrradpolizei Köln & Fischereiaufsicht Rügen',
- u'thumbnail': u'http://autoimg.static-fra.de/nitronow/338273/1500x1500/image2.jpg'
+ u'upload_date': u'20130926',
+ u'title': u'Recht & Ordnung - Lebensmittelkontrolle Erlangen/Ordnungsamt...',
+ u'description': u'Lebensmittelkontrolle Erlangen/Ordnungsamt Berlin',
+ u'thumbnail': u'http://autoimg.static-fra.de/nitronow/344787/1500x1500/image2.jpg',
},
u'params': {
u'skip_download': True,
From 47192f92d801f38c0a608ca9c6cecc682ab2ecc6 Mon Sep 17 00:00:00 2001
From: Filippo Valsorda
Date: Mon, 30 Sep 2013 16:26:25 -0400
Subject: [PATCH 181/215] implement --no-playlist to only download current
video - closes #755
---
README.md | 1 +
youtube_dl/YoutubeDL.py | 1 +
youtube_dl/__init__.py | 2 ++
youtube_dl/extractor/youtube.py | 13 ++++++++++++-
4 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/README.md b/README.md
index fc8070c37..66a483b76 100644
--- a/README.md
+++ b/README.md
@@ -50,6 +50,7 @@ which means you can modify it, redistribute it or use it however you like.
--date DATE download only videos uploaded in this date
--datebefore DATE download only videos uploaded before this date
--dateafter DATE download only videos uploaded after this date
+ --no-playlist download only the currently playing video
## Download Options:
-r, --rate-limit LIMIT maximum download rate (e.g. 50k or 44.6m)
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 44a272e7e..2503fd09b 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -83,6 +83,7 @@ class YoutubeDL(object):
skip_download: Skip the actual download of the video file
cachedir: Location of the cache files in the filesystem.
None to disable filesystem cache.
+ noplaylist: Download single video instead of a playlist if in doubt.
The following parameters are not used by YoutubeDL itself, they are used by
the FileDownloader:
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 28a7bdd92..c9e75eab4 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -187,6 +187,7 @@ def parseOpts(overrideArguments=None):
selection.add_option('--date', metavar='DATE', dest='date', help='download only videos uploaded in this date', default=None)
selection.add_option('--datebefore', metavar='DATE', dest='datebefore', help='download only videos uploaded before this date', default=None)
selection.add_option('--dateafter', metavar='DATE', dest='dateafter', help='download only videos uploaded after this date', default=None)
+ selection.add_option('--no-playlist', action='store_true', dest='noplaylist', help='download only the currently playing video', default=False)
authentication.add_option('-u', '--username',
@@ -599,6 +600,7 @@ def _real_main(argv=None):
'progress_with_newline': opts.progress_with_newline,
'playliststart': opts.playliststart,
'playlistend': opts.playlistend,
+ 'noplaylist': opts.noplaylist,
'logtostderr': opts.outtmpl == '-',
'consoletitle': opts.consoletitle,
'nopart': opts.nopart,
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 53f13b516..c6876c69f 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -13,6 +13,7 @@ import struct
import traceback
import xml.etree.ElementTree
import zlib
+import urlparse
from .common import InfoExtractor, SearchInfoExtractor
from .subtitles import SubtitlesInfoExtractor
@@ -1523,9 +1524,19 @@ class YoutubePlaylistIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
+ playlist_id = mobj.group(1) or mobj.group(2)
+
+ # Check if it's a video-specific URL
+ query_dict = urlparse.parse_qs(urlparse.urlparse(url).query)
+ if 'v' in query_dict:
+ video_id = query_dict['v'][0]
+ if self._downloader.params.get('noplaylist'):
+ self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
+ return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
+ else:
+ self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
# Download playlist videos from API
- playlist_id = mobj.group(1) or mobj.group(2)
videos = []
for page_num in itertools.count(1):
From d4d9920a2630ef6c44cffa1b923e41291b44b5f0 Mon Sep 17 00:00:00 2001
From: Filippo Valsorda
Date: Mon, 30 Sep 2013 18:01:17 -0400
Subject: [PATCH 182/215] add test for --no-playlist
---
test/test_youtube_lists.py | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index dd9e292b0..f28fe78e0 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -27,6 +27,13 @@ class TestYoutubeLists(unittest.TestCase):
ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]
self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])
+ def test_youtube_playlist_noplaylist(self):
+ dl = FakeYDL()
+ dl.params['noplaylist'] = True
+ ie = YoutubePlaylistIE(dl)
+ result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')
+ self.assertEqual(result['_type'], 'url')
+
def test_issue_673(self):
dl = FakeYDL()
ie = YoutubePlaylistIE(dl)
From fa556755930db77c62016a468d870e76608db012 Mon Sep 17 00:00:00 2001
From: Mark Oteiza
Date: Sun, 29 Sep 2013 22:50:46 -0400
Subject: [PATCH 183/215] Support XDG base directory specification
---
youtube_dl/__init__.py | 16 +++++++++++++---
youtube_dl/extractor/youtube.py | 8 ++++++--
2 files changed, 19 insertions(+), 5 deletions(-)
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 28a7bdd92..95f75942a 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -113,6 +113,12 @@ def parseOpts(overrideArguments=None):
pass
return opts
+ xdg_cache_home = os.environ.get('XDG_CACHE_HOME')
+ if xdg_cache_home:
+ userCacheDir = os.path.join(xdg_cache_home, 'youtube-dl')
+ else:
+ userCacheDir = os.path.join(os.path.expanduser('~'), '.cache', 'youtube-dl')
+
max_width = 80
max_help_position = 80
@@ -168,7 +174,7 @@ def parseOpts(overrideArguments=None):
general.add_option('--proxy', dest='proxy', default=None, help='Use the specified HTTP/HTTPS proxy', metavar='URL')
general.add_option('--no-check-certificate', action='store_true', dest='no_check_certificate', default=False, help='Suppress HTTPS certificate validation.')
general.add_option(
- '--cache-dir', dest='cachedir', default=u'~/.youtube-dl/cache',
+ '--cache-dir', dest='cachedir', default=userCacheDir,
help='Location in the filesystem where youtube-dl can store downloaded information permanently. %default by default')
general.add_option(
'--no-cache-dir', action='store_const', const=None, dest='cachedir',
@@ -369,9 +375,13 @@ def parseOpts(overrideArguments=None):
else:
xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
if xdg_config_home:
- userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
+ userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
+ if not os.path.isfile(userConfFile):
+ userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
else:
- userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
+ userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl', 'config')
+ if not os.path.isfile(userConfFile):
+ userConfFile = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
systemConf = _readOptions('/etc/youtube-dl.conf')
userConf = _readOptions(userConfFile)
commandLineConf = sys.argv[1:]
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 53f13b516..23e384ba2 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -420,8 +420,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
# Read from filesystem cache
func_id = '%s_%s_%d' % (player_type, player_id, slen)
assert os.path.basename(func_id) == func_id
- cache_dir = self._downloader.params.get('cachedir',
- u'~/.youtube-dl/cache')
+ xdg_cache_home = os.environ.get('XDG_CACHE_HOME')
+ if xdg_cache_home:
+ userCacheDir = os.path.join(xdg_cache_home, 'youtube-dl')
+ else:
+ userCacheDir = os.path.join(os.path.expanduser('~'), '.cache', 'youtube-dl')
+ cache_dir = self._downloader.params.get('cachedir', userCacheDir)
cache_enabled = cache_dir is not None
if cache_enabled:
From 4c62a16f4f4994c63e80eafcaeb5e6ff90305c38 Mon Sep 17 00:00:00 2001
From: rzhxeo
Date: Tue, 1 Oct 2013 06:55:30 +0200
Subject: [PATCH 184/215] [RTLnowIE] Add support for http://n-tvnow.de
---
youtube_dl/extractor/rtlnow.py | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py
index 32541077f..fe66cc6e5 100644
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@@ -8,8 +8,8 @@ from ..utils import (
)
class RTLnowIE(InfoExtractor):
- """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW and VOX NOW"""
- _VALID_URL = r'(?:http://)?(?P(?Prtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?rtlnitronow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
+ """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
+ _VALID_URL = r'(?:http://)?(?P