From 056b56688a2ee5ba39dd5b2cbb003fd98529180f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 29 Mar 2014 15:55:03 +0700 Subject: [PATCH 01/41] [ntv] Simplify --- youtube_dl/extractor/ntv.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/ntv.py b/youtube_dl/extractor/ntv.py index e998d156e..8447a9b86 100644 --- a/youtube_dl/extractor/ntv.py +++ b/youtube_dl/extractor/ntv.py @@ -5,7 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( - RegexNotFoundError, + ExtractorError, unescapeHTML ) @@ -98,16 +98,15 @@ class NTVIE(InfoExtractor): page = self._download_webpage(url, video_id, 'Downloading page') - def extract(patterns, name, page, fatal=False): - for pattern in patterns: - mobj = re.search(pattern, page) - if mobj: - return mobj.group(1) - if fatal: - raise RegexNotFoundError(u'Unable to extract %s' % name) - return None + for pattern in self._VIDEO_ID_REGEXES: + mobj = re.search(pattern, page) + if mobj: + break - video_id = extract(self._VIDEO_ID_REGEXES, 'video id', page, fatal=True) + if not mobj: + raise ExtractorError('No media links available for %s' % video_id) + + video_id = mobj.group(1) player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML') title = unescapeHTML(player.find('./data/title').text) From 2cc0082dc0fd35a99c06caa87377d60e1dc557a8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 29 Mar 2014 10:11:32 +0100 Subject: [PATCH 02/41] Credit @phaer for OE1 (#2646) --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index f3b2be0c1..6e00806e6 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -51,6 +51,7 @@ __authors__ = ( 'David Wagner', 'Juan C. Olivares', 'Mattias Harrysson', + 'phaer', ) __license__ = 'Public Domain' From 7e70ac36b314d100264e77d1374f50b709196d4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 29 Mar 2014 11:55:12 +0100 Subject: [PATCH 03/41] [bloomberg] Fix extraction (fixes #2154) Stop using the OoyalaIE, extract the f4m url instead. --- youtube_dl/extractor/bloomberg.py | 36 +++++++++++++++++++------------ 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py index 2415ce403..25fb79e14 100644 --- a/youtube_dl/extractor/bloomberg.py +++ b/youtube_dl/extractor/bloomberg.py @@ -1,22 +1,21 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor -from .ooyala import OoyalaIE class BloombergIE(InfoExtractor): _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P.+?)\.html' _TEST = { - u'url': u'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', - u'file': u'12bzhqZTqQHmmlA8I-i0NpzJgcG5NNYX.mp4', - u'info_dict': { - u'title': u'Shah\'s Presentation on Foreign-Exchange Strategies', - u'description': u'md5:abc86e5236f9f0e4866c59ad36736686', - }, - u'params': { - # Requires ffmpeg (m3u8 manifest) - u'skip_download': True, + 'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', + 'md5': '7bf08858ff7c203c870e8a6190e221e5', + 'info_dict': { + 'id': 'qurhIVlJSB6hzkVi229d8g', + 'ext': 'flv', + 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies', + 'description': 'md5:0681e0d30dcdfc6abf34594961d8ea88', }, } @@ -24,7 +23,16 @@ class BloombergIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) name = mobj.group('name') webpage = self._download_webpage(url, name) - embed_code = self._search_regex( - r' Date: Sat, 29 Mar 2014 14:01:53 +0100 Subject: [PATCH 04/41] release 2014.03.29 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 154aeca05..764b52871 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.03.28' +__version__ = '2014.03.29' From f2bcdd8e02c43b04c1df7346fdfddb18dbf6070f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 29 Mar 2014 13:52:43 +0100 Subject: [PATCH 05/41] [discovery] modernize --- youtube_dl/extractor/discovery.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index 885944c5e..2ae6ecc12 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -10,9 +10,10 @@ class DiscoveryIE(InfoExtractor): _VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P[a-zA-Z0-9\-]*)(.htm)?' _TEST = { 'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm', - 'file': '614784.mp4', 'md5': 'e12614f9ee303a6ccef415cb0793eba2', 'info_dict': { + 'id': '614784', + 'ext': 'mp4', 'title': 'MythBusters: Mission Impossible Outtakes', 'description': ('Watch Jamie Hyneman and Adam Savage practice being' ' each other -- to the point of confusing Jamie\'s dog -- and ' @@ -34,7 +35,7 @@ class DiscoveryIE(InfoExtractor): formats = [] for f in info['mp4']: formats.append( - {'url': f['src'], r'ext': r'mp4', 'tbr': int(f['bitrate'][:-1])}) + {'url': f['src'], 'ext': 'mp4', 'tbr': int(f['bitrate'][:-1])}) return { 'id': info['contentId'], From 79bfd01001ece4629d5370ec9dbff77b0c187eb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 29 Mar 2014 14:20:53 +0100 Subject: [PATCH 06/41] [kickstarter] Fix extraction, extract more info and modernize --- youtube_dl/extractor/kickstarter.py | 46 +++++++++++++++-------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py index 50bc883ef..961dd1aa6 100644 --- a/youtube_dl/extractor/kickstarter.py +++ b/youtube_dl/extractor/kickstarter.py @@ -1,37 +1,39 @@ +# encoding: utf-8 +from __future__ import unicode_literals + import re from .common import InfoExtractor class KickStarterIE(InfoExtractor): - _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P\d*)/.*' + _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P[^/]*)/.*' _TEST = { - u"url": u"https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location", - u"file": u"1404461844.mp4", - u"md5": u"c81addca81327ffa66c642b5d8b08cab", - u"info_dict": { - u"title": u"Intersection: The Story of Josh Grant by Kyle Cowling", + 'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location', + 'md5': 'c81addca81327ffa66c642b5d8b08cab', + 'info_dict': { + 'id': '1404461844', + 'ext': 'mp4', + 'title': 'Intersection: The Story of Josh Grant by Kyle Cowling', + 'description': 'A unique motocross documentary that examines the ' + 'life and mind of one of sports most elite athletes: Josh Grant.', }, } def _real_extract(self, url): m = re.match(self._VALID_URL, url) video_id = m.group('id') - webpage_src = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, video_id) - video_url = self._search_regex(r'data-video="(.*?)">', - webpage_src, u'video URL') - if 'mp4' in video_url: - ext = 'mp4' - else: - ext = 'flv' - video_title = self._html_search_regex(r"(.*?)", - webpage_src, u'title').rpartition(u'\u2014 Kickstarter')[0].strip() + video_url = self._search_regex(r'data-video-url="(.*?)"', + webpage, 'video URL') + video_title = self._html_search_regex(r'(.*?)', + webpage, 'title').rpartition('— Kickstarter')[0].strip() - results = [{ - 'id': video_id, - 'url': video_url, - 'title': video_title, - 'ext': ext, - }] - return results + return { + 'id': video_id, + 'url': video_url, + 'title': video_title, + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + } From 40c716d2a2cb1473695f7ef87cc78fcedd22541a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 29 Mar 2014 14:34:03 +0100 Subject: [PATCH 07/41] [ign] Modernize --- youtube_dl/extractor/ign.py | 102 ++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 52 deletions(-) diff --git a/youtube_dl/extractor/ign.py b/youtube_dl/extractor/ign.py index 381af91e4..cfeaa4146 100644 --- a/youtube_dl/extractor/ign.py +++ b/youtube_dl/extractor/ign.py @@ -1,10 +1,8 @@ +from __future__ import unicode_literals + import re -import json from .common import InfoExtractor -from ..utils import ( - determine_ext, -) class IGNIE(InfoExtractor): @@ -14,52 +12,57 @@ class IGNIE(InfoExtractor): """ _VALID_URL = r'https?://.+?\.ign\.com/(?Pvideos|show_videos|articles|(?:[^/]*/feature))(/.+)?/(?P.+)' - IE_NAME = u'ign.com' + IE_NAME = 'ign.com' _CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config' - _DESCRIPTION_RE = [r'(.+?)', - r'id="my_show_video">.*?

(.*?)

', - ] + _DESCRIPTION_RE = [ + r'(.+?)', + r'id="my_show_video">.*?

(.*?)

', + ] _TESTS = [ { - u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review', - u'file': u'8f862beef863986b2785559b9e1aa599.mp4', - u'md5': u'eac8bdc1890980122c3b66f14bdd02e9', - u'info_dict': { - u'title': u'The Last of Us Review', - u'description': u'md5:c8946d4260a4d43a00d5ae8ed998870c', + 'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review', + 'md5': 'eac8bdc1890980122c3b66f14bdd02e9', + 'info_dict': { + 'id': '8f862beef863986b2785559b9e1aa599', + 'ext': 'mp4', + 'title': 'The Last of Us Review', + 'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c', } }, { - u'url': u'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind', - u'playlist': [ + 'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind', + 'playlist': [ { - u'file': u'5ebbd138523268b93c9141af17bec937.mp4', - u'info_dict': { - u'title': u'GTA 5 Video Review', - u'description': u'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.', + 'info_dict': { + 'id': '5ebbd138523268b93c9141af17bec937', + 'ext': 'mp4', + 'title': 'GTA 5 Video Review', + 'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.', }, }, { - u'file': u'638672ee848ae4ff108df2a296418ee2.mp4', - u'info_dict': { - u'title': u'26 Twisted Moments from GTA 5 in Slow Motion', - u'description': u'The twisted beauty of GTA 5 in stunning slow motion.', + 'info_dict': { + 'id': '638672ee848ae4ff108df2a296418ee2', + 'ext': 'mp4', + 'title': '26 Twisted Moments from GTA 5 in Slow Motion', + 'description': 'The twisted beauty of GTA 5 in stunning slow motion.', }, }, ], - u'params': { - u'skip_download': True, + 'params': { + 'skip_download': True, }, }, ] def _find_video_id(self, webpage): - res_id = [r'data-video-id="(.+?)"', - r'video)/id/(?P.+)' IE_NAME = '1up.com' _DESCRIPTION_RE = r'
(.+?)
' _TEST = { - u'url': u'http://gamevideos.1up.com/video/id/34976', - u'file': u'34976.mp4', - u'md5': u'68a54ce4ebc772e4b71e3123d413163d', - u'info_dict': { - u'title': u'Sniper Elite V2 - Trailer', - u'description': u'md5:5d289b722f5a6d940ca3136e9dae89cf', + 'url': 'http://gamevideos.1up.com/video/id/34976', + 'md5': '68a54ce4ebc772e4b71e3123d413163d', + 'info_dict': { + 'id': '34976', + 'ext': 'mp4', + 'title': 'Sniper Elite V2 - Trailer', + 'description': 'md5:5d289b722f5a6d940ca3136e9dae89cf', } } @@ -123,7 +122,6 @@ class OneUPIE(IGNIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - id = mobj.group('name_or_id') result = super(OneUPIE, self)._real_extract(url) - result['id'] = id + result['id'] = mobj.group('name_or_id') return result From 2583a0308bb11b355377f08d532b51f8db1d5316 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 29 Mar 2014 14:35:45 +0100 Subject: [PATCH 08/41] [huffpost] Modernize test --- youtube_dl/extractor/huffpost.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/huffpost.py b/youtube_dl/extractor/huffpost.py index 0d1ea6802..94e7cf790 100644 --- a/youtube_dl/extractor/huffpost.py +++ b/youtube_dl/extractor/huffpost.py @@ -21,9 +21,10 @@ class HuffPostIE(InfoExtractor): _TEST = { 'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677', - 'file': '52dd3e4b02a7602131000677.mp4', 'md5': '55f5e8981c1c80a64706a44b74833de8', 'info_dict': { + 'id': '52dd3e4b02a7602131000677', + 'ext': 'mp4', 'title': 'Legalese It! with @MikeSacksHP', 'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ', 'duration': 1549, From 986f56736b4b51bab1bdea88883a33416cb0dede Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 29 Mar 2014 14:44:36 +0100 Subject: [PATCH 09/41] [roxwel] Modernize --- youtube_dl/extractor/roxwel.py | 52 ++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/roxwel.py b/youtube_dl/extractor/roxwel.py index d339e6cb5..41638c1d0 100644 --- a/youtube_dl/extractor/roxwel.py +++ b/youtube_dl/extractor/roxwel.py @@ -1,5 +1,6 @@ +from __future__ import unicode_literals + import re -import json from .common import InfoExtractor from ..utils import unified_strdate, determine_ext @@ -9,41 +10,44 @@ class RoxwelIE(InfoExtractor): _VALID_URL = r'https?://www\.roxwel\.com/player/(?P.+?)(\.|\?|$)' _TEST = { - u'url': u'http://www.roxwel.com/player/passionpittakeawalklive.html', - u'file': u'passionpittakeawalklive.flv', - u'md5': u'd9dea8360a1e7d485d2206db7fe13035', - u'info_dict': { - u'title': u'Take A Walk (live)', - u'uploader': u'Passion Pit', - u'description': u'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ', + 'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html', + 'info_dict': { + 'id': 'passionpittakeawalklive', + 'ext': 'flv', + 'title': 'Take A Walk (live)', + 'uploader': 'Passion Pit', + 'uploader_id': 'passionpit', + 'upload_date': '20120928', + 'description': 'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ', }, - u'skip': u'Requires rtmpdump', + 'params': { + # rtmp download + 'skip_download': True, + } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) filename = mobj.group('filename') info_url = 'http://www.roxwel.com/api/videos/%s' % filename - info_page = self._download_webpage(info_url, filename, - u'Downloading video info') + info = self._download_json(info_url, filename) - self.report_extraction(filename) - info = json.loads(info_page) rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')]) best_rate = rtmp_rates[-1] url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate) - rtmp_url = self._download_webpage(url_page_url, filename, u'Downloading video url') + rtmp_url = self._download_webpage(url_page_url, filename, 'Downloading video url') ext = determine_ext(rtmp_url) if ext == 'f4v': rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename) - return {'id': filename, - 'title': info['title'], - 'url': rtmp_url, - 'ext': 'flv', - 'description': info['description'], - 'thumbnail': info.get('player_image_url') or info.get('image_url_large'), - 'uploader': info['artist'], - 'uploader_id': info['artistname'], - 'upload_date': unified_strdate(info['dbdate']), - } + return { + 'id': filename, + 'title': info['title'], + 'url': rtmp_url, + 'ext': 'flv', + 'description': info['description'], + 'thumbnail': info.get('player_image_url') or info.get('image_url_large'), + 'uploader': info['artist'], + 'uploader_id': info['artistname'], + 'upload_date': unified_strdate(info['dbdate']), + } From 87a2566048838f132ceb31dda23e23b12740750c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 29 Mar 2014 14:46:05 +0100 Subject: [PATCH 10/41] [metacritic] Modernize test --- youtube_dl/extractor/metacritic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/metacritic.py b/youtube_dl/extractor/metacritic.py index 465ac4916..07f072924 100644 --- a/youtube_dl/extractor/metacritic.py +++ b/youtube_dl/extractor/metacritic.py @@ -13,8 +13,9 @@ class MetacriticIE(InfoExtractor): _TEST = { 'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222', - 'file': '3698222.mp4', 'info_dict': { + 'id': '3698222', + 'ext': 'mp4', 'title': 'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors', 'description': 'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.', 'duration': 221, From 2ff7f8975e98e0e09513b6ba042da23348bacc41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 29 Mar 2014 14:54:45 +0100 Subject: [PATCH 11/41] [nba] Modernize --- youtube_dl/extractor/nba.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/nba.py b/youtube_dl/extractor/nba.py index 7e421610e..633b42f72 100644 --- a/youtube_dl/extractor/nba.py +++ b/youtube_dl/extractor/nba.py @@ -6,12 +6,13 @@ from .common import InfoExtractor class NBAIE(InfoExtractor): - _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*?)(?:/index\.html)?(?:\?.*)?$' + _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P/[^?]*?)(?:/index\.html)?(?:\?.*)?$' _TEST = { 'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html', - 'file': u'0021200253-okc-bkn-recap.nba.mp4', 'md5': u'c0edcfc37607344e2ff8f13c378c88a4', 'info_dict': { + 'id': '0021200253-okc-bkn-recap.nba', + 'ext': 'mp4', 'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.', 'title': 'Thunder vs. Nets', }, @@ -19,7 +20,7 @@ class NBAIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) + video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) @@ -33,7 +34,6 @@ class NBAIE(InfoExtractor): return { 'id': shortened_video_id, 'url': video_url, - 'ext': 'mp4', 'title': title, 'description': description, } From 2da67107ee3d80d67ed71963389f70c118cff0e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 29 Mar 2014 15:05:15 +0100 Subject: [PATCH 12/41] [tf1] Modernize --- youtube_dl/extractor/tf1.py | 40 ++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/tf1.py b/youtube_dl/extractor/tf1.py index 2c5c88be8..fdae17b1b 100644 --- a/youtube_dl/extractor/tf1.py +++ b/youtube_dl/extractor/tf1.py @@ -1,33 +1,37 @@ # coding: utf-8 +from __future__ import unicode_literals -import json import re from .common import InfoExtractor + class TF1IE(InfoExtractor): """TF1 uses the wat.tv player.""" - _VALID_URL = r'http://videos\.tf1\.fr/.*-(.*?)\.html' + _VALID_URL = r'http://videos\.tf1\.fr/.*-(?P.*?)\.html' _TEST = { - u'url': u'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', - u'file': u'10635995.mp4', - u'md5': u'2e378cc28b9957607d5e88f274e637d8', - u'info_dict': { - u'title': u'Citroën Grand C4 Picasso 2013 : présentation officielle', - u'description': u'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.', + 'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', + 'info_dict': { + 'id': '10635995', + 'ext': 'mp4', + 'title': 'Citroën Grand C4 Picasso 2013 : présentation officielle', + 'description': 'Vidéo officielle du nouveau Citroën Grand C4 Picasso, lancé à l\'automne 2013.', + }, + 'params': { + # Sometimes wat serves the whole file with the --test option + 'skip_download': True, }, - u'skip': u'Sometimes wat serves the whole file with the --test option', } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - id = mobj.group(1) - webpage = self._download_webpage(url, id) - embed_url = self._html_search_regex(r'"(https://www.wat.tv/embedframe/.*?)"', - webpage, 'embed url') - embed_page = self._download_webpage(embed_url, id, u'Downloading embed player page') + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + embed_url = self._html_search_regex( + r'"(https://www.wat.tv/embedframe/.*?)"', webpage, 'embed url') + embed_page = self._download_webpage(embed_url, video_id, + 'Downloading embed player page') wat_id = self._search_regex(r'UVID=(.*?)&', embed_page, 'wat id') - wat_info = self._download_webpage('http://www.wat.tv/interface/contentv3/%s' % wat_id, id, u'Downloading Wat info') - wat_info = json.loads(wat_info)['media'] - wat_url = wat_info['url'] - return self.url_result(wat_url, 'Wat') + wat_info = self._download_json( + 'http://www.wat.tv/interface/contentv3/%s' % wat_id, video_id) + return self.url_result(wat_info['media']['url'], 'Wat') From e79162558eca2e53a0cd5252102945bed7041601 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sat, 29 Mar 2014 15:15:16 +0100 Subject: [PATCH 13/41] [wat] Modernize --- youtube_dl/extractor/wat.py | 52 ++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py index 4fab6c6e8..a584e0896 100644 --- a/youtube_dl/extractor/wat.py +++ b/youtube_dl/extractor/wat.py @@ -1,37 +1,37 @@ # coding: utf-8 +from __future__ import unicode_literals -import json import re from .common import InfoExtractor - from ..utils import ( unified_strdate, ) class WatIE(InfoExtractor): - _VALID_URL=r'http://www\.wat\.tv/.*-(?P.*?)_.*?\.html' + _VALID_URL = r'http://www\.wat\.tv/.*-(?P.*?)_.*?\.html' IE_NAME = 'wat.tv' _TEST = { - u'url': u'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html', - u'file': u'10631273.mp4', - u'md5': u'd8b2231e1e333acd12aad94b80937e19', - u'info_dict': { - u'title': u'World War Z - Philadelphia VOST', - u'description': u'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr', + 'url': 'http://www.wat.tv/video/world-war-philadelphia-vost-6bv55_2fjr7_.html', + 'info_dict': { + 'id': '10631273', + 'ext': 'mp4', + 'title': 'World War Z - Philadelphia VOST', + 'description': 'La menace est partout. Que se passe-t-il à Philadelphia ?\r\nWORLD WAR Z, avec Brad Pitt, au cinéma le 3 juillet.\r\nhttp://www.worldwarz.fr', + }, + 'params': { + # Sometimes wat serves the whole file with the --test option + 'skip_download': True, }, - u'skip': u'Sometimes wat serves the whole file with the --test option', } - + def download_video_info(self, real_id): # 'contentv4' is used in the website, but it also returns the related # videos, we don't need them - info = self._download_webpage('http://www.wat.tv/interface/contentv3/' + real_id, real_id, 'Downloading video info') - info = json.loads(info) + info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id) return info['media'] - def _real_extract(self, url): def real_id_for_chapter(chapter): return chapter['tc_start'].split('-')[0] @@ -56,17 +56,17 @@ class WatIE(InfoExtractor): entries = [self.url_result(chapter_url) for chapter_url in chapter_urls] return self.playlist_result(entries, real_id, video_info['title']) + upload_date = None + if 'date_diffusion' in first_chapter: + upload_date = unified_strdate(first_chapter['date_diffusion']) # Otherwise we can continue and extract just one part, we have to use # the short id for getting the video url - info = {'id': real_id, - 'url': 'http://wat.tv/get/android5/%s.mp4' % real_id, - 'ext': 'mp4', - 'title': first_chapter['title'], - 'thumbnail': first_chapter['preview'], - 'description': first_chapter['description'], - 'view_count': video_info['views'], - } - if 'date_diffusion' in first_chapter: - info['upload_date'] = unified_strdate(first_chapter['date_diffusion']) - - return info + return { + 'id': real_id, + 'url': 'http://wat.tv/get/android5/%s.mp4' % real_id, + 'title': first_chapter['title'], + 'thumbnail': first_chapter['preview'], + 'description': first_chapter['description'], + 'view_count': video_info['views'], + 'upload_date': upload_date, + } From 62fec3b2fffd12949da6fe057ce08d5bab2b7db5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 30 Mar 2014 06:02:41 +0200 Subject: [PATCH 14/41] Add new --encoding option (Fixes #2650) --- youtube_dl/YoutubeDL.py | 21 +++++++++++++++++++++ youtube_dl/__init__.py | 6 ++++-- youtube_dl/postprocessor/ffmpeg.py | 5 +++-- youtube_dl/utils.py | 1 - 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index ae0ec49f8..6646fe348 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -8,6 +8,7 @@ import datetime import errno import io import json +import locale import os import platform import re @@ -159,6 +160,7 @@ class YoutubeDL(object): include_ads: Download ads as well default_search: Prepend this string if an input url is not valid. 'auto' for elaborate guessing + encoding: Use this encoding instead of the system-specified. The following parameters are not used by YoutubeDL itself, they are used by the FileDownloader: @@ -1200,6 +1202,9 @@ class YoutubeDL(object): def print_debug_header(self): if not self.params.get('verbose'): return + + write_string('[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % + (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, self.get_encoding())) write_string('[debug] youtube-dl version ' + __version__ + '\n') try: sp = subprocess.Popen( @@ -1264,3 +1269,19 @@ class YoutubeDL(object): # (See https://github.com/rg3/youtube-dl/issues/1309 for details) opener.addheaders = [] self._opener = opener + + def encode(self, s): + if isinstance(s, bytes): + return s # Already encoded + + try: + return s.encode(self.get_encoding()) + except UnicodeEncodeError as err: + err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.' + raise + + def get_encoding(self): + encoding = self.params.get('encoding') + if encoding is None: + encoding = preferredencoding() + return encoding diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 6e00806e6..4d3d6caed 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -263,6 +263,9 @@ def parseOpts(overrideArguments=None): '--ignore-config', action='store_true', help='Do not read configuration files. When given in the global configuration file /etc/youtube-dl.conf: do not read the user configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows)') + general.add_option( + '--encoding', dest='encoding', metavar='ENCODING', + help='Force the specified encoding (experimental)') selection.add_option( '--playlist-start', @@ -540,8 +543,6 @@ def parseOpts(overrideArguments=None): write_string(u'[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') write_string(u'[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') write_string(u'[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') - write_string(u'[debug] Encodings: locale %r, fs %r, out %r, pref: %r\n' % - (locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, preferredencoding())) return parser, opts, args @@ -786,6 +787,7 @@ def _real_main(argv=None): 'include_ads': opts.include_ads, 'default_search': opts.default_search, 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest, + 'encoding': opts.encoding, } with YoutubeDL(ydl_opts) as ydl: diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index c22f2cdc6..98b5eccb4 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -53,8 +53,9 @@ class FFmpegPostProcessor(PostProcessor): if self._downloader.params.get('verbose', False): self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd)) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout,stderr = p.communicate() + bcmd = [self._downloader.encode(c) for c in cmd] + p = subprocess.Popen(bcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = p.communicate() if p.returncode != 0: stderr = stderr.decode('utf-8', 'replace') msg = stderr.strip().split('\n')[-1] diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b5326c0cb..de9881372 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -539,7 +539,6 @@ def encodeFilename(s, for_subprocess=False): encoding = 'utf-8' return s.encode(encoding, 'ignore') - def decodeOption(optval): if optval is None: return optval From 2b25cb5d7693b62736d4cdfa656289cc429c4c81 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 30 Mar 2014 07:02:58 +0200 Subject: [PATCH 15/41] [youtube] Move JavaScript interpreter into its own module --- youtube_dl/extractor/youtube.py | 110 ++----------------------------- youtube_dl/jsinterp.py | 113 ++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+), 106 deletions(-) create mode 100644 youtube_dl/jsinterp.py diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 3a3a5a39e..2d1a19123 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -14,6 +14,7 @@ import zlib from .common import InfoExtractor, SearchInfoExtractor from .subtitles import SubtitlesInfoExtractor +from ..jsinterp import JSInterpreter from ..utils import ( compat_chr, compat_parse_qs, @@ -438,113 +439,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _parse_sig_js(self, jscode): funcname = self._search_regex( r'signature=([a-zA-Z]+)', jscode, - u'Initial JS player signature function name') + u'Initial JS player signature function name') - functions = {} - - def argidx(varname): - return string.lowercase.index(varname) - - def interpret_statement(stmt, local_vars, allow_recursion=20): - if allow_recursion < 0: - raise ExtractorError(u'Recursion limit reached') - - if stmt.startswith(u'var '): - stmt = stmt[len(u'var '):] - ass_m = re.match(r'^(?P[a-z]+)(?:\[(?P[^\]]+)\])?' + - r'=(?P.*)$', stmt) - if ass_m: - if ass_m.groupdict().get('index'): - def assign(val): - lvar = local_vars[ass_m.group('out')] - idx = interpret_expression(ass_m.group('index'), - local_vars, allow_recursion) - assert isinstance(idx, int) - lvar[idx] = val - return val - expr = ass_m.group('expr') - else: - def assign(val): - local_vars[ass_m.group('out')] = val - return val - expr = ass_m.group('expr') - elif stmt.startswith(u'return '): - assign = lambda v: v - expr = stmt[len(u'return '):] - else: - raise ExtractorError( - u'Cannot determine left side of statement in %r' % stmt) - - v = interpret_expression(expr, local_vars, allow_recursion) - return assign(v) - - def interpret_expression(expr, local_vars, allow_recursion): - if expr.isdigit(): - return int(expr) - - if expr.isalpha(): - return local_vars[expr] - - m = re.match(r'^(?P[a-z]+)\.(?P.*)$', expr) - if m: - member = m.group('member') - val = local_vars[m.group('in')] - if member == 'split("")': - return list(val) - if member == 'join("")': - return u''.join(val) - if member == 'length': - return len(val) - if member == 'reverse()': - return val[::-1] - slice_m = re.match(r'slice\((?P.*)\)', member) - if slice_m: - idx = interpret_expression( - slice_m.group('idx'), local_vars, allow_recursion-1) - return val[idx:] - - m = re.match( - r'^(?P[a-z]+)\[(?P.+)\]$', expr) - if m: - val = local_vars[m.group('in')] - idx = interpret_expression(m.group('idx'), local_vars, - allow_recursion-1) - return val[idx] - - m = re.match(r'^(?P.+?)(?P[%])(?P.+?)$', expr) - if m: - a = interpret_expression(m.group('a'), - local_vars, allow_recursion) - b = interpret_expression(m.group('b'), - local_vars, allow_recursion) - return a % b - - m = re.match( - r'^(?P[a-zA-Z$]+)\((?P[a-z0-9,]+)\)$', expr) - if m: - fname = m.group('func') - if fname not in functions: - functions[fname] = extract_function(fname) - argvals = [int(v) if v.isdigit() else local_vars[v] - for v in m.group('args').split(',')] - return functions[fname](argvals) - raise ExtractorError(u'Unsupported JS expression %r' % expr) - - def extract_function(funcname): - func_m = re.search( - r'function ' + re.escape(funcname) + - r'\((?P[a-z,]+)\){(?P[^}]+)}', - jscode) - argnames = func_m.group('args').split(',') - - def resf(args): - local_vars = dict(zip(argnames, args)) - for stmt in func_m.group('code').split(';'): - res = interpret_statement(stmt, local_vars) - return res - return resf - - initial_function = extract_function(funcname) + jsi = JSInterpreter(jscode) + initial_function = jsi.extract_function(funcname) return lambda s: initial_function([s]) def _parse_sig_swf(self, file_contents): diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py new file mode 100644 index 000000000..129a4027b --- /dev/null +++ b/youtube_dl/jsinterp.py @@ -0,0 +1,113 @@ +from __future__ import unicode_literals + +import re + +from .utils import ( + ExtractorError, +) + + +class JSInterpreter(object): + def __init__(self, code): + self.code = code + self._functions = {} + + def interpret_statement(self, stmt, local_vars, allow_recursion=20): + if allow_recursion < 0: + raise ExtractorError('Recursion limit reached') + + if stmt.startswith('var '): + stmt = stmt[len('var '):] + ass_m = re.match(r'^(?P[a-z]+)(?:\[(?P[^\]]+)\])?' + + r'=(?P.*)$', stmt) + if ass_m: + if ass_m.groupdict().get('index'): + def assign(val): + lvar = local_vars[ass_m.group('out')] + idx = self.interpret_expression( + ass_m.group('index'), local_vars, allow_recursion) + assert isinstance(idx, int) + lvar[idx] = val + return val + expr = ass_m.group('expr') + else: + def assign(val): + local_vars[ass_m.group('out')] = val + return val + expr = ass_m.group('expr') + elif stmt.startswith('return '): + assign = lambda v: v + expr = stmt[len('return '):] + else: + raise ExtractorError( + 'Cannot determine left side of statement in %r' % stmt) + + v = self.interpret_expression(expr, local_vars, allow_recursion) + return assign(v) + + def interpret_expression(self, expr, local_vars, allow_recursion): + if expr.isdigit(): + return int(expr) + + if expr.isalpha(): + return local_vars[expr] + + m = re.match(r'^(?P[a-z]+)\.(?P.*)$', expr) + if m: + member = m.group('member') + val = local_vars[m.group('in')] + if member == 'split("")': + return list(val) + if member == 'join("")': + return u''.join(val) + if member == 'length': + return len(val) + if member == 'reverse()': + return val[::-1] + slice_m = re.match(r'slice\((?P.*)\)', member) + if slice_m: + idx = self.interpret_expression( + slice_m.group('idx'), local_vars, allow_recursion - 1) + return val[idx:] + + m = re.match( + r'^(?P[a-z]+)\[(?P.+)\]$', expr) + if m: + val = local_vars[m.group('in')] + idx = self.interpret_expression( + m.group('idx'), local_vars, allow_recursion - 1) + return val[idx] + + m = re.match(r'^(?P.+?)(?P[%])(?P.+?)$', expr) + if m: + a = self.interpret_expression( + m.group('a'), local_vars, allow_recursion) + b = self.interpret_expression( + m.group('b'), local_vars, allow_recursion) + return a % b + + m = re.match( + r'^(?P[a-zA-Z$]+)\((?P[a-z0-9,]+)\)$', expr) + if m: + fname = m.group('func') + if fname not in self._functions: + self._functions[fname] = self.extract_function(fname) + argvals = [int(v) if v.isdigit() else local_vars[v] + for v in m.group('args').split(',')] + return self._functions[fname](argvals) + raise ExtractorError('Unsupported JS expression %r' % expr) + + def extract_function(self, funcname): + func_m = re.search( + r'function ' + re.escape(funcname) + + r'\((?P[a-z,]+)\){(?P[^}]+)}', + self.code) + argnames = func_m.group('args').split(',') + + def resf(args): + local_vars = dict(zip(argnames, args)) + for stmt in func_m.group('code').split(';'): + res = self.interpret_statement(stmt, local_vars) + return res + return resf + From 77ffa957010fb7fe19433633223317d1147dd080 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 30 Mar 2014 07:15:14 +0200 Subject: [PATCH 16/41] [jsinterp] Better error messages --- youtube_dl/jsinterp.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 129a4027b..449482d3c 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -99,9 +99,12 @@ class JSInterpreter(object): def extract_function(self, funcname): func_m = re.search( - r'function ' + re.escape(funcname) + + (r'(?:function %s|%s\s*=\s*function)' % ( + re.escape(funcname), re.escape(funcname))) + r'\((?P[a-z,]+)\){(?P[^}]+)}', self.code) + if func_m is None: + raise ExtractorError('Could not find JS function %r' % funcname) argnames = func_m.group('args').split(',') def resf(args): From acd213ed6d15668a8bdf121cc93cd6a8f42f443b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 30 Mar 2014 07:16:07 +0200 Subject: [PATCH 17/41] Remove unusued imports --- youtube_dl/extractor/appletrailers.py | 1 - youtube_dl/extractor/youtube.py | 1 - 2 files changed, 2 deletions(-) diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index fc5d6825e..dc8657b67 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -6,7 +6,6 @@ import json from .common import InfoExtractor from ..utils import ( compat_urlparse, - determine_ext, ) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2d1a19123..e206392bb 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -7,7 +7,6 @@ import itertools import json import os.path import re -import string import struct import traceback import zlib From cd7481a39eaeb3eadc8c4ace2f7063b28a6fa5d0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 30 Mar 2014 07:25:42 +0200 Subject: [PATCH 18/41] [wdr] Add support for wdrmaus.de (Fixes #2651) --- youtube_dl/extractor/__init__.py | 5 +- youtube_dl/extractor/wdr.py | 86 +++++++++++++++++++++++++++++++- 2 files changed, 88 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 8e81fa619..a665d7f0f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -285,7 +285,10 @@ from .vk import VKIE from .vube import VubeIE from .washingtonpost import WashingtonPostIE from .wat import WatIE -from .wdr import WDRIE +from .wdr import ( + WDRIE, + WDRMausIE, +) from .weibo import WeiboIE from .wimp import WimpIE from .wistia import WistiaIE diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 500b9146f..2048744e1 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -4,9 +4,10 @@ import re from .common import InfoExtractor from ..utils import ( - unified_strdate, + compat_parse_qs, compat_urlparse, determine_ext, + unified_strdate, ) @@ -111,4 +112,85 @@ class WDRIE(InfoExtractor): 'description': description, 'thumbnail': thumbnail, 'upload_date': upload_date, - } \ No newline at end of file + } + + +class WDRMausIE(InfoExtractor): + _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:extras/|sachgeschichten/sachgeschichten/)?(?P[^/?#]+)(?:/index\.php5|\.php5|/(?:$|[?#]))' + IE_DESC = 'Sendung mit der Maus' + _TESTS = [{ + 'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5', + 'info_dict': { + 'id': 'aktuelle-sendung', + 'ext': 'mp4', + 'thumbnail': 're:^http://.+\.jpg', + 'upload_date': 're:^[0-9]{8}$', + 'title': 're:^[0-9.]{10} - Aktuelle Sendung$', + } + }, { + 'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/40_jahre_maus.php5', + 'md5': '3b1227ca3ed28d73ec5737c65743b2a3', + 'info_dict': { + 'id': '40_jahre_maus', + 'ext': 'mp4', + 'thumbnail': 're:^http://.+\.jpg', + 'upload_date': '20131007', + 'title': '12.03.2011 - 40 Jahre Maus', + } + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + param_code = self._html_search_regex( + r'

Sendedatum:\s*([0-9\.]+)

', + webpage, 'air date') + title_str = self._html_search_regex( + r'

(.*?)

', webpage, 'title') + title = '%s - %s' % (title_date, title_str) + upload_date = unified_strdate( + self._html_search_meta('dc.date', webpage)) + + fields = compat_parse_qs(param_code) + video_url = fields['firstVideo'][0] + thumbnail = compat_urlparse.urljoin(url, fields['startPicture'][0]) + + formats = [{ + 'format_id': 'rtmp', + 'url': video_url, + }] + + jscode = self._download_webpage( + 'http://www.wdrmaus.de/codebase/js/extended-medien.min.js', + video_id, fatal=False, + note='Downloading URL translation table', + errnote='Could not download URL translation table') + if jscode: + for m in re.finditer( + r"stream:\s*'dslSrc=(?P[^']+)',\s*download:\s*'(?P
[^']+)'\s*\}", + jscode): + if video_url.startswith(m.group('stream')): + http_url = video_url.replace( + m.group('stream'), m.group('dl')) + formats.append({ + 'format_id': 'http', + 'url': http_url, + }) + break + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + } + +# TODO test _1 \ No newline at end of file From cbc4a6cc7e8de8d7103afcad1173a5e8910ad35a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 30 Mar 2014 07:25:48 +0200 Subject: [PATCH 19/41] release 2014.03.30 --- README.md | 1 + youtube_dl/version.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ac1c3adad..5bb6c6e4e 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,7 @@ which means you can modify it, redistribute it or use it however you like. configuration in ~/.config/youtube-dl.conf (%APPDATA%/youtube-dl/config.txt on Windows) + --encoding ENCODING Force the specified encoding (experimental) ## Video Selection: --playlist-start NUMBER playlist video to start at (default is 1) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 764b52871..6192c4761 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.03.29' +__version__ = '2014.03.30' From 9a7b072e38ac6aafc346692a268e7a399a07c607 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 30 Mar 2014 07:42:35 +0200 Subject: [PATCH 20/41] [wdr] Add support for more wdrmaus subpages --- youtube_dl/extractor/wdr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 2048744e1..63691aa67 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -116,7 +116,7 @@ class WDRIE(InfoExtractor): class WDRMausIE(InfoExtractor): - _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:extras/|sachgeschichten/sachgeschichten/)?(?P[^/?#]+)(?:/index\.php5|\.php5|/(?:$|[?#]))' + _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P[^/?#]+)(?:/index\.php5|(? Date: Sun, 30 Mar 2014 15:35:07 +0700 Subject: [PATCH 21/41] [rutube] Modernize --- youtube_dl/extractor/rutube.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 4922dd764..7c6607460 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -20,8 +20,9 @@ class RutubeIE(InfoExtractor): _TEST = { 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', - 'file': '3eac3b4561676c17df9132a9a1e62e3e.mp4', 'info_dict': { + 'id': '3eac3b4561676c17df9132a9a1e62e3e', + 'ext': 'mp4', 'title': 'Раненный кенгуру забежал в аптеку', 'description': 'http://www.ntdtv.ru ', 'duration': 80, @@ -39,12 +40,14 @@ class RutubeIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - api_response = self._download_webpage('http://rutube.ru/api/video/%s/?format=json' % video_id, - video_id, 'Downloading video JSON') + api_response = self._download_webpage( + 'http://rutube.ru/api/video/%s/?format=json' % video_id, + video_id, 'Downloading video JSON') video = json.loads(api_response) - api_response = self._download_webpage('http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id, - video_id, 'Downloading trackinfo JSON') + api_response = self._download_webpage( + 'http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id, + video_id, 'Downloading trackinfo JSON') trackinfo = json.loads(api_response) # Some videos don't have the author field From 83d548ef0fc451947806c7f21c542e0cd40b4e9d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 30 Mar 2014 15:53:06 +0200 Subject: [PATCH 22/41] [youtube] Encode ytsearch query --- youtube_dl/extractor/youtube.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e206392bb..6384095f9 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1609,7 +1609,7 @@ class YoutubeUserIE(InfoExtractor): class YoutubeSearchIE(SearchInfoExtractor): IE_DESC = u'YouTube.com searches' - _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc' + _API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc' _MAX_RESULTS = 1000 IE_NAME = u'youtube:search' _SEARCH_KEY = 'ytsearch' @@ -1620,9 +1620,12 @@ class YoutubeSearchIE(SearchInfoExtractor): video_ids = [] pagenum = 0 limit = n + PAGE_SIZE = 50 - while (50 * pagenum) < limit: - result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1) + while (PAGE_SIZE * pagenum) < limit: + result_url = self._API_URL % ( + compat_urllib_parse.quote_plus(query.encode('utf-8')), + (PAGE_SIZE * pagenum) + 1) data_json = self._download_webpage( result_url, video_id=u'query "%s"' % query, note=u'Downloading page %s' % (pagenum + 1), From 9c1fc022ae3e1232a31f79dbde1e85783f26fa6d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 30 Mar 2014 15:57:31 +0200 Subject: [PATCH 23/41] [generic] Warn before fallback to automatic search --- youtube_dl/__init__.py | 9 +++++---- youtube_dl/extractor/generic.py | 7 +++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 4d3d6caed..7c135db32 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -256,9 +256,10 @@ def parseOpts(overrideArguments=None): general.add_option( '--bidi-workaround', dest='bidi_workaround', action='store_true', help=u'Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH') - general.add_option('--default-search', - dest='default_search', metavar='PREFIX', - help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.') + general.add_option( + '--default-search', + dest='default_search', metavar='PREFIX', + help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.') general.add_option( '--ignore-config', action='store_true', @@ -676,7 +677,7 @@ def _real_main(argv=None): date = DateRange.day(opts.date) else: date = DateRange(opts.dateafter, opts.datebefore) - if opts.default_search not in ('auto', None) and ':' not in opts.default_search: + if opts.default_search not in ('auto', 'auto_warning', None) and ':' not in opts.default_search: parser.error(u'--default-search invalid; did you forget a colon (:) at the end?') # Do not download videos when there are audio-only formats diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index fc1bedd57..9f698323c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -316,13 +316,16 @@ class GenericIE(InfoExtractor): if not parsed_url.scheme: default_search = self._downloader.params.get('default_search') if default_search is None: - default_search = 'auto' + default_search = 'auto_warning' - if default_search == 'auto': + if default_search in ('auto', 'auto_warning'): if '/' in url: self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') return self.url_result('http://' + url) else: + if default_search == 'auto_warning': + self._downloader.report_warning( + 'Falling back to youtube search for %s . Set --default-search to "auto" to suppress this warning.' % url) return self.url_result('ytsearch:' + url) else: assert ':' in default_search From d41ac5f5dcff6161a094839e54b3b26c1286f90b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 30 Mar 2014 15:57:47 +0200 Subject: [PATCH 24/41] release 2014.03.30.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6192c4761..ba26f9372 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.03.30' +__version__ = '2014.03.30.1' From 1cbd4106201fe9b3890932c5a099892b0a05db19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 31 Mar 2014 19:31:48 +0700 Subject: [PATCH 25/41] [pyvideo] Modernize --- youtube_dl/extractor/pyvideo.py | 63 ++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py index 33054591b..d2d909136 100644 --- a/youtube_dl/extractor/pyvideo.py +++ b/youtube_dl/extractor/pyvideo.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re import os @@ -5,45 +7,50 @@ from .common import InfoExtractor class PyvideoIE(InfoExtractor): - _VALID_URL = r'(?:http://)?(?:www\.)?pyvideo\.org/video/(?P\d+)/(.*)' - _TESTS = [{ - u'url': u'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes', - u'file': u'24_4WWkSmNo.mp4', - u'md5': u'de317418c8bc76b1fd8633e4f32acbc6', - u'info_dict': { - u"title": u"Become a logging expert in 30 minutes", - u"description": u"md5:9665350d466c67fb5b1598de379021f7", - u"upload_date": u"20130320", - u"uploader": u"NextDayVideo", - u"uploader_id": u"NextDayVideo", + _VALID_URL = r'http://(?:www\.)?pyvideo\.org/video/(?P\d+)/(.*)' + + _TESTS = [ + { + 'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes', + 'md5': 'de317418c8bc76b1fd8633e4f32acbc6', + 'info_dict': { + 'id': '24_4WWkSmNo', + 'ext': 'mp4', + 'title': 'Become a logging expert in 30 minutes', + 'description': 'md5:9665350d466c67fb5b1598de379021f7', + 'upload_date': '20130320', + 'uploader': 'NextDayVideo', + 'uploader_id': 'NextDayVideo', + }, + 'add_ie': ['Youtube'], }, - u'add_ie': ['Youtube'], - }, - { - u'url': u'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v', - u'md5': u'5fe1c7e0a8aa5570330784c847ff6d12', - u'info_dict': { - u'id': u'2542', - u'ext': u'm4v', - u'title': u'Gloriajw-SpotifyWithErikBernhardsson182', + { + 'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v', + 'md5': '5fe1c7e0a8aa5570330784c847ff6d12', + 'info_dict': { + 'id': '2542', + 'ext': 'm4v', + 'title': 'Gloriajw-SpotifyWithErikBernhardsson182', + }, }, - }, ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) - m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage) + webpage = self._download_webpage(url, video_id) + + m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage) if m_youtube is not None: return self.url_result(m_youtube.group(1), 'Youtube') - title = self._html_search_regex(r'
.*?

([^>]+?)

', - webpage, u'title', flags=re.DOTALL) - video_url = self._search_regex([r'Download.*?
.*?

([^>]+?)

', webpage, 'title', flags=re.DOTALL) + video_url = self._search_regex( + [r'Download.*?
Date: Tue, 1 Apr 2014 00:02:29 +0200 Subject: [PATCH 26/41] [comedycentral] Add support for /videos URLs (Fixes #2660) --- test/test_all_urls.py | 7 ++++++- youtube_dl/extractor/comedycentral.py | 7 +++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 5b6d18a82..dffe3f958 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -144,7 +144,12 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['PBS']) def test_ComedyCentralShows(self): - self.assertMatch('http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', ['ComedyCentralShows']) + self.assertMatch( + 'http://thedailyshow.cc.com/extended-interviews/xm3fnq/andrew-napolitano-extended-interview', + ['ComedyCentralShows']) + self.assertMatch( + 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news', + ['ComedyCentralShows']) if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 60c0a4f5d..cbc212065 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -43,7 +43,8 @@ class ComedyCentralShowsIE(InfoExtractor): (?Pthedailyshow|thecolbertreport)\.(?:cc\.)?com/ (full-episodes/(?P.*)| (?P - (the-colbert-report-(videos|collections)/(?P[0-9]+)/[^/]*/(?P.*?)) + (?:videos/[^/]+/(?P[^/?#]+)) + |(the-colbert-report-(videos|collections)/(?P[0-9]+)/[^/]*/(?P.*?)) |(watch/(?P[^/]*)/(?P.*)))| (?P extended-interviews/(?P[0-9a-z]+)/(?:playlist_tds_extended_)?(?P.*?)(/.*?)?))) @@ -102,7 +103,9 @@ class ComedyCentralShowsIE(InfoExtractor): assert mobj is not None if mobj.group('clip'): - if mobj.group('showname') == 'thedailyshow': + if mobj.group('videotitle'): + epTitle = mobj.group('videotitle') + elif mobj.group('showname') == 'thedailyshow': epTitle = mobj.group('tdstitle') else: epTitle = mobj.group('cntitle') From 28d9032c88e746860b75a035b053501d3d105f8c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 1 Apr 2014 00:02:39 +0200 Subject: [PATCH 27/41] release 2014.04.01 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ba26f9372..dca584937 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.03.30.1' +__version__ = '2014.04.01' From 651486621dd79025a1b0ea08abe786b50a9604ad Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 1 Apr 2014 00:25:11 +0200 Subject: [PATCH 28/41] [comedycentral] Allow URLs with query parts (fixes #2661) --- test/test_all_urls.py | 3 +++ youtube_dl/extractor/comedycentral.py | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index dffe3f958..ed041ffda 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -150,6 +150,9 @@ class TestAllURLsMatching(unittest.TestCase): self.assertMatch( 'http://thecolbertreport.cc.com/videos/29w6fx/-realhumanpraise-for-fox-news', ['ComedyCentralShows']) + self.assertMatch( + 'http://thecolbertreport.cc.com/videos/gh6urb/neil-degrasse-tyson-pt--1?xrs=eml_col_031114', + ['ComedyCentralShows']) if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index cbc212065..ed0c6ea2c 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -45,10 +45,11 @@ class ComedyCentralShowsIE(InfoExtractor): (?P (?:videos/[^/]+/(?P[^/?#]+)) |(the-colbert-report-(videos|collections)/(?P[0-9]+)/[^/]*/(?P.*?)) - |(watch/(?P[^/]*)/(?P.*)))| + |(watch/(?P[^/]*)/(?P.*)) + )| (?P extended-interviews/(?P[0-9a-z]+)/(?:playlist_tds_extended_)?(?P.*?)(/.*?)?))) - $''' + (?:[?#].*|$)''' _TEST = { 'url': 'http://thedailyshow.cc.com/watch/thu-december-13-2012/kristen-stewart', 'md5': '4e2f5cb088a83cd8cdb7756132f9739d', From 017e4dd58ce4ebc2dbd3deb724d416e0f3f9e0ec Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 1 Apr 2014 00:25:17 +0200 Subject: [PATCH 29/41] release 2014.04.01.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index dca584937..e2616f19c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.04.01' +__version__ = '2014.04.01.1' From 5912c639df1b3fe6c14b488d77cb619fa808de75 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 1 Apr 2014 05:56:56 +0200 Subject: [PATCH 30/41] [youtube] Transform google's JSON dialect (fixes #2663) --- youtube_dl/extractor/youtube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6384095f9..856a9a596 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1446,7 +1446,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): break more = self._download_json( - 'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num) + 'https://youtube.com/%s' % mobj.group('more'), playlist_id, + 'Downloading page #%s' % page_num, + transform_source=uppercase_escape) content_html = more['content_html'] more_widget_html = more['load_more_widget_html'] From c8fc3fb524fff44fd1e162abc08f7f1678aa4233 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 1 Apr 2014 05:57:15 +0200 Subject: [PATCH 31/41] release 2014.04.01.2 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e2616f19c..a253cff92 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.04.01.1' +__version__ = '2014.04.01.2' From a612753db9c22556967bf60c2eee8a8e63cd98ba Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 1 Apr 2014 13:17:07 +0200 Subject: [PATCH 32/41] [utils] Correct decoding of large unicode codepoints in uppercase_escape (Fixes #2664) --- youtube_dl/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index de9881372..e54ea9d61 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1264,8 +1264,8 @@ class PagedList(object): def uppercase_escape(s): return re.sub( - r'\\U([0-9a-fA-F]{8})', - lambda m: compat_chr(int(m.group(1), base=16)), s) + r'\\U[0-9a-fA-F]{8}', + lambda m: m.group(0).decode('unicode-escape'), s) try: struct.pack(u'!I', 0) From 5853a7316ead52fe21134b24dbcde39c4e6aa9f3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 1 Apr 2014 13:17:15 +0200 Subject: [PATCH 33/41] release 2014.04.01.3 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a253cff92..742ca2a0e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.04.01.2' +__version__ = '2014.04.01.3' From f659951e22430f3f6000a69affef36e78d583ed2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 1 Apr 2014 19:37:57 +0700 Subject: [PATCH 34/41] [vk] Support optional dash for oid in embedded links --- youtube_dl/extractor/vk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 3b3bec92f..8b1432fec 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -16,7 +16,7 @@ from ..utils import ( class VKIE(InfoExtractor): IE_NAME = 'vk.com' - _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P\d+).*?\bid=(?P\d+)|(?:videos.*?\?.*?z=)?video(?P.*?)(?:\?|%2F|$))' + _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P-?\d+).*?\bid=(?P\d+)|(?:videos.*?\?.*?z=)?video(?P.*?)(?:\?|%2F|$))' _NETRC_MACHINE = 'vk' _TESTS = [ From 0479c625a44e414674982383f5fe051893d125b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 1 Apr 2014 20:17:35 +0700 Subject: [PATCH 35/41] [brightcove] Encode object_str with utf-8 --- youtube_dl/extractor/brightcove.py | 2 +- youtube_dl/extractor/generic.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 83eec84d3..339d60ff0 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -87,7 +87,7 @@ class BrightcoveIE(InfoExtractor): object_str = object_str.replace('<--', '