From 412f356e04b0daaa1a862f8fdc155ae63376e7d2 Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Sat, 17 May 2014 14:47:23 +0200 Subject: [PATCH 01/89] [gameone] Add new extractor gameone Currently only usable for downloading tv episodes residing under http://www.gameone.de/tv/ --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/gameone.py | 62 ++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 youtube_dl/extractor/gameone.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3503c76b7..a294f66ae 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -103,6 +103,7 @@ from .freesound import FreesoundIE from .freespeech import FreespeechIE from .funnyordie import FunnyOrDieIE from .gamekings import GamekingsIE +from .gameone import GameOneIE from .gamespot import GameSpotIE from .gametrailers import GametrailersIE from .gdcvault import GDCVaultIE diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py new file mode 100644 index 000000000..a8a290477 --- /dev/null +++ b/youtube_dl/extractor/gameone.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import xpath_with_ns + +NAMESPACE_MAP = { + 'media': 'http://search.yahoo.com/mrss/', +} + +RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' + +class GameOneIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P\d+)' + _TESTS = { + 'url': 'http://www.gameone.de/tv/288', + 'md5': '136656b7fb4c9cb4a8e2d500651c499b', + 'info_dict': { + 'id': '288', + 'ext': 'mp4', + 'title': 'Game One - Folge 288', + 'duration': 1238, + 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + og_video = self._og_search_video_url(webpage, secure=False) + mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss') + + mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') + title = mrss.find('.//item/title').text + thumbnail = mrss.find('.//item/image').get('url') + content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) + content_url = content.get('url') + + content = self._download_xml(content_url, video_id, 'Downloading media:content') + rendition_items = content.findall('.//rendition') + duration = int(rendition_items[0].get('duration')) + formats = [ + { + 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text), + 'width': int(r.get('width')), + 'height': int(r.get('height')), + 'tbr': int(r.get('bitrate')), + } + for r in rendition_items + ] + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + } From 10d5c7aa5fcc4a05b039cc147b3e36732a56b0d2 Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Sat, 17 May 2014 15:10:19 +0200 Subject: [PATCH 02/89] [gameone] Added explanation for usage of http://cdn.riptide-mtvn.com/ --- youtube_dl/extractor/gameone.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index a8a290477..d5fb19cec 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -10,6 +10,8 @@ NAMESPACE_MAP = { 'media': 'http://search.yahoo.com/mrss/', } +# URL prefix to download the mp4 files directly instead of streaming via rtmp +# Credits go to XBox-Maniac http://board.jdownloader.org/showpost.php?p=185835&postcount=31 RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' class GameOneIE(InfoExtractor): From 9e30092361c3b94d66bf2aaf99087d0df201718c Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Sat, 17 May 2014 17:07:40 +0200 Subject: [PATCH 03/89] [gameone] Added extraction of description and fixed failing tests --- youtube_dl/extractor/gameone.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index d5fb19cec..855df74fb 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import xml.etree.ElementTree as ET from .common import InfoExtractor from ..utils import xpath_with_ns @@ -16,7 +17,7 @@ RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' class GameOneIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P\d+)' - _TESTS = { + _TEST = { 'url': 'http://www.gameone.de/tv/288', 'md5': '136656b7fb4c9cb4a8e2d500651c499b', 'info_dict': { @@ -25,6 +26,11 @@ class GameOneIE(InfoExtractor): 'title': 'Game One - Folge 288', 'duration': 1238, 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', + 'description': 'Puh, das ist ja wieder eine volle Packung! Erst begleiten wir Nils zum ' + 'FIFA-Pressepokal 2014, den er nach 2010 nun zum zweiten Mal gewinnen will.\n' + 'Danach gibt’s eine Vorschau auf die drei kommenden Hits “Star Citizen”, “Kingdom Come: Deliverance” und “Project Cars”.\n' + 'Und dann geht’s auch schon weiter mit der nächsten Folge vom Nerdquiz! Der schöne Trant foltert seine Kandidaten wieder ' + 'mit fiesen Fragen. Hier gibt’s die erste Hälfte, in Folge 289 geht’s weiter.' } } @@ -39,6 +45,7 @@ class GameOneIE(InfoExtractor): mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') title = mrss.find('.//item/title').text thumbnail = mrss.find('.//item/image').get('url') + description = self._extract_description(mrss) content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) content_url = content.get('url') @@ -61,4 +68,9 @@ class GameOneIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, + 'description': description, } + + def _extract_description(self, mrss): + description = mrss.find('.//item/description') + return u''.join(t for t in description.itertext()) From a84d20fc14eb70310af85da385c879c365fd7897 Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Sat, 17 May 2014 18:20:29 +0200 Subject: [PATCH 04/89] [gameone] Simplified extraction of description --- youtube_dl/extractor/gameone.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index 855df74fb..aa0234346 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -26,11 +26,7 @@ class GameOneIE(InfoExtractor): 'title': 'Game One - Folge 288', 'duration': 1238, 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', - 'description': 'Puh, das ist ja wieder eine volle Packung! Erst begleiten wir Nils zum ' - 'FIFA-Pressepokal 2014, den er nach 2010 nun zum zweiten Mal gewinnen will.\n' - 'Danach gibt’s eine Vorschau auf die drei kommenden Hits “Star Citizen”, “Kingdom Come: Deliverance” und “Project Cars”.\n' - 'Und dann geht’s auch schon weiter mit der nächsten Folge vom Nerdquiz! Der schöne Trant foltert seine Kandidaten wieder ' - 'mit fiesen Fragen. Hier gibt’s die erste Hälfte, in Folge 289 geht’s weiter.' + 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1', } } @@ -40,12 +36,12 @@ class GameOneIE(InfoExtractor): webpage = self._download_webpage(url, video_id) og_video = self._og_search_video_url(webpage, secure=False) + description = self._html_search_meta('description', webpage) mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss') mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') title = mrss.find('.//item/title').text thumbnail = mrss.find('.//item/image').get('url') - description = self._extract_description(mrss) content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) content_url = content.get('url') @@ -70,7 +66,3 @@ class GameOneIE(InfoExtractor): 'formats': formats, 'description': description, } - - def _extract_description(self, mrss): - description = mrss.find('.//item/description') - return u''.join(t for t in description.itertext()) From a231ce87b56d85354f66d4a9b26763bc73ca86c1 Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Sat, 17 May 2014 18:35:11 +0200 Subject: [PATCH 05/89] [gameone] Added extraction of age_limit --- youtube_dl/extractor/gameone.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index aa0234346..3b3870878 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -27,6 +27,7 @@ class GameOneIE(InfoExtractor): 'duration': 1238, 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1', + 'age_limit': 16 } } @@ -37,6 +38,7 @@ class GameOneIE(InfoExtractor): webpage = self._download_webpage(url, video_id) og_video = self._og_search_video_url(webpage, secure=False) description = self._html_search_meta('description', webpage) + age_limit = int(self._search_regex(r'age=(\d+)', self._html_search_meta('age-de-meta-label', webpage), 'age_limit', '0')) mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss') mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') @@ -65,4 +67,5 @@ class GameOneIE(InfoExtractor): 'duration': duration, 'formats': formats, 'description': description, + 'age_limit': age_limit, } From 305d0683628d26c8e9ba04c77c4b3c7283106f80 Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Sat, 17 May 2014 19:04:02 +0200 Subject: [PATCH 06/89] [gameone] Added timestamp extraction --- youtube_dl/extractor/gameone.py | 14 +++++++++++--- youtube_dl/utils.py | 6 +++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index 3b3870878..008eb90a5 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -2,10 +2,12 @@ from __future__ import unicode_literals import re -import xml.etree.ElementTree as ET from .common import InfoExtractor -from ..utils import xpath_with_ns +from ..utils import ( + xpath_with_ns, + parse_iso8601 +) NAMESPACE_MAP = { 'media': 'http://search.yahoo.com/mrss/', @@ -15,6 +17,8 @@ NAMESPACE_MAP = { # Credits go to XBox-Maniac http://board.jdownloader.org/showpost.php?p=185835&postcount=31 RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' +PUB_DATE_FORMAT = '%Y-%m-%d %H:%M:%S %z' + class GameOneIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P\d+)' _TEST = { @@ -27,7 +31,9 @@ class GameOneIE(InfoExtractor): 'duration': 1238, 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1', - 'age_limit': 16 + 'age_limit': 16, + 'upload_date': '20140513', + 'timestamp': 1399980122, } } @@ -44,6 +50,7 @@ class GameOneIE(InfoExtractor): mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') title = mrss.find('.//item/title').text thumbnail = mrss.find('.//item/image').get('url') + timestamp = parse_iso8601(mrss.find('.//pubDate').text, delimiter=' ') content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) content_url = content.get('url') @@ -68,4 +75,5 @@ class GameOneIE(InfoExtractor): 'formats': formats, 'description': description, 'age_limit': age_limit, + 'timestamp': timestamp, } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1036ea9bd..3e7947f5d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -765,7 +765,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): https_response = http_response -def parse_iso8601(date_str): +def parse_iso8601(date_str, delimiter='T'): """ Return a UNIX timestamp from the given date """ if date_str is None: @@ -785,8 +785,8 @@ def parse_iso8601(date_str): timezone = datetime.timedelta( hours=sign * int(m.group('hours')), minutes=sign * int(m.group('minutes'))) - - dt = datetime.datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S') - timezone + date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter) + dt = datetime.datetime.strptime(date_str, date_format) - timezone return calendar.timegm(dt.timetuple()) From 1d0668ed5a39b089b30b8e1e273c6b8a4f954eb2 Mon Sep 17 00:00:00 2001 From: Adam Malcontenti-Wilson Date: Mon, 19 May 2014 23:28:21 +1000 Subject: [PATCH 07/89] [tenplay] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/tenplay.py | 72 ++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 youtube_dl/extractor/tenplay.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3503c76b7..a2c12fc8e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -265,6 +265,7 @@ from .sztvhu import SztvHuIE from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE from .ted import TEDIE +from .tenplay import TenPlayIE from .testurl import TestURLIE from .tf1 import TF1IE from .theplatform import ThePlatformIE diff --git a/youtube_dl/extractor/tenplay.py b/youtube_dl/extractor/tenplay.py new file mode 100644 index 000000000..449351551 --- /dev/null +++ b/youtube_dl/extractor/tenplay.py @@ -0,0 +1,72 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +class TenPlayIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+' + _TEST = { + 'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way', + 'md5': 'c9dda6aac8f814352ad2aee8899b1612', + 'info_dict': { + 'id': '2695695426001', + 'ext': 'flv', + 'title': 'TENplay: TV your way', + 'description': 'Welcome to a new TV experience. Enjoy a taste of the TENplay benefits.', + 'timestamp': 1380150606.889, + 'upload_date': '20130925', + 'uploader': 'TENplay' + } + } + + _video_fields = ["id","name","shortDescription","longDescription","creationDate","publishedDate","lastModifiedDate","customFields","videoStillURL","thumbnailURL","referenceId","length","playsTotal","playsTrailingWeek","renditions","captioning","startDate","endDate"] + + def _real_extract(self, url): + webpage = self._download_webpage(url, url) + video_id = self._html_search_regex(r'videoID: "(\d+?)"', webpage, 'video_id') + api_token = self._html_search_regex(r'apiToken: "([a-zA-Z0-9-_\.]+?)"', webpage, 'api_token') + title = self._html_search_regex(r'', webpage, 'title') + + json = self._download_json('https://api.brightcove.com/services/library?command=find_video_by_id&video_id=%s&token=%s&video_fields=%s' % (video_id, api_token, ','.join(self._video_fields)), title) + + formats = [] + for rendition in json['renditions']: + url = rendition['remoteUrl'] or rendition['url'] + protocol = 'rtmp' if url.startswith('rtmp') else 'http' + ext = 'flv' if protocol == 'rtmp' else rendition['videoContainer'].lower() + + if protocol == 'rtmp': + url = url.replace('&mp4:', '') + + formats.append({ + 'format_id': '_'.join(['rtmp', rendition['videoContainer'].lower(), rendition['videoCodec'].lower()]), + 'width': rendition['frameWidth'], + 'height': rendition['frameHeight'], + 'tbr': rendition['encodingRate'] / 1024, + 'filesize': rendition['size'], + 'protocol': protocol, + 'ext': ext, + 'vcodec': rendition['videoCodec'].lower(), + 'container': rendition['videoContainer'].lower(), + 'url': url + }) + + return { + 'id': video_id, + 'display_id': json['referenceId'], + 'title': json['name'], + 'description': json['shortDescription'] or json['longDescription'], + 'formats': formats, + 'thumbnails': [{ + 'url': json['videoStillURL'] + }, { + 'url': json['thumbnailURL'] + }], + 'thumbnail': json['videoStillURL'], + 'duration': json['length'] / 1000, + 'timestamp': float(json['creationDate']) / 1000, + 'uploader': json['customFields']['production_company_distributor'] if 'production_company_distributor' in json['customFields'] else 'TENplay', + 'view_count': json['playsTotal'] + } From e5c3a4b54995422dcef1d2fbb032446e35358e8d Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Mon, 19 May 2014 22:33:51 +0200 Subject: [PATCH 08/89] [gameone] Fix indentation and removed unused constants --- youtube_dl/extractor/gameone.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index 008eb90a5..2544ea521 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -14,10 +14,10 @@ NAMESPACE_MAP = { } # URL prefix to download the mp4 files directly instead of streaming via rtmp -# Credits go to XBox-Maniac http://board.jdownloader.org/showpost.php?p=185835&postcount=31 +# Credits go to XBox-Maniac +# http://board.jdownloader.org/showpost.php?p=185835&postcount=31 RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' -PUB_DATE_FORMAT = '%Y-%m-%d %H:%M:%S %z' class GameOneIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P\d+)' @@ -44,7 +44,14 @@ class GameOneIE(InfoExtractor): webpage = self._download_webpage(url, video_id) og_video = self._og_search_video_url(webpage, secure=False) description = self._html_search_meta('description', webpage) - age_limit = int(self._search_regex(r'age=(\d+)', self._html_search_meta('age-de-meta-label', webpage), 'age_limit', '0')) + age_limit = int( + self._search_regex( + r'age=(\d+)', + self._html_search_meta( + 'age-de-meta-label', + webpage), + 'age_limit', + '0')) mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss') mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') @@ -54,16 +61,19 @@ class GameOneIE(InfoExtractor): content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) content_url = content.get('url') - content = self._download_xml(content_url, video_id, 'Downloading media:content') + content = self._download_xml( + content_url, + video_id, + 'Downloading media:content') rendition_items = content.findall('.//rendition') duration = int(rendition_items[0].get('duration')) formats = [ - { - 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text), - 'width': int(r.get('width')), - 'height': int(r.get('height')), - 'tbr': int(r.get('bitrate')), - } + { + 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text), + 'width': int(r.get('width')), + 'height': int(r.get('height')), + 'tbr': int(r.get('bitrate')), + } for r in rendition_items ] From 0d933b2ad57563b70d725ce03fe0e79c4d84c99e Mon Sep 17 00:00:00 2001 From: Ariset Llerena Date: Thu, 12 Jun 2014 03:27:23 -0400 Subject: [PATCH 09/89] Added vimple.ru support --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/vimple.py | 73 ++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 youtube_dl/extractor/vimple.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 45cc479e2..4b7900b4f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -324,6 +324,7 @@ from .vimeo import ( VimeoReviewIE, VimeoWatchLaterIE, ) +from .vimple import VimpleIE from .vine import ( VineIE, VineUserIE, diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py new file mode 100644 index 000000000..0f69e7126 --- /dev/null +++ b/youtube_dl/extractor/vimple.py @@ -0,0 +1,73 @@ +# coding: utf-8 +from __future__ import unicode_literals +import re, zlib, base64 +import xml.etree.ElementTree + +from .common import InfoExtractor + +class VimpleIE(InfoExtractor): + IE_DESC = 'Vimple.ru' + _VALID_URL = r'https?://player.vimple.ru/iframe/(?P[a-f0-9]+)' + _TESTS = [ + { + # Quality: Large, from iframe + 'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c', + 'info_dict': { + 'id': 'b132bdfd71b546d3972f9ab9a25f201c', + 'title': 'great-escape-minecraft.flv', + 'ext':'mp4', + 'duration': 352, + 'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c', + }, + } + ] + + #http://jsunpack-n.googlecode.com/svn-history/r63/trunk/swf.py + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + iframe_url = 'http://player.vimple.ru/iframe/%s' % video_id + + iframe = self._download_webpage(iframe_url, video_id, note='Downloading iframe', errnote='unable to fetch iframe') + player_url = self._html_search_regex(r'"(http://player.vimple.ru/flash/.+?)"', iframe, 'player url') + + player = self._request_webpage(player_url, video_id, note='Downloading swf player').read() + + #http://stackoverflow.com/a/6804758 + #http://stackoverflow.com/a/12073686 + player = zlib.decompress(player[8:]) + + + xml_pieces = re.findall(b'([a-zA-Z0-9 =\\+/]{500})', player) + xml_pieces = [piece[1:-1] for piece in xml_pieces] + + xml_data = b''.join(xml_pieces) + xml_data = base64.b64decode(xml_data) + + xml_data = xml.etree.ElementTree.fromstring(xml_data) + + video = xml_data.find('Video') + quality = video.get('quality') + q_tag = video.find(quality.capitalize()) + + formats = [ + { + 'url': q_tag.get('url'), + 'tbr': int(q_tag.get('bitrate')), + 'filesize': int(q_tag.get('filesize')), + 'format_id': quality, + }, + ] + + return { + 'id': video_id, + 'title': video.find('Title').text, + 'formats': formats, + 'thumbnail': video.find('Poster').get('url'), + 'duration': int(video.get('duration')), + 'webpage_url': video.find('Share').get('videoPageUrl'), + } + + From cb437dc2ad41122f1a08595a0829b4d929ddd580 Mon Sep 17 00:00:00 2001 From: Ariset Llerena Date: Thu, 12 Jun 2014 22:33:50 -0400 Subject: [PATCH 10/89] removed extra char in regexp --- youtube_dl/extractor/vimple.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index 0f69e7126..a2f93afe3 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -40,7 +40,7 @@ class VimpleIE(InfoExtractor): player = zlib.decompress(player[8:]) - xml_pieces = re.findall(b'([a-zA-Z0-9 =\\+/]{500})', player) + xml_pieces = re.findall(b'([a-zA-Z0-9 =+/]{500})', player) xml_pieces = [piece[1:-1] for piece in xml_pieces] xml_data = b''.join(xml_pieces) From e66ab17a3683bee57482ccce8f6b0a632f03d78e Mon Sep 17 00:00:00 2001 From: Ariset Llerena Date: Thu, 12 Jun 2014 23:08:06 -0400 Subject: [PATCH 11/89] Verified with pep8 and pyflakes --- youtube_dl/extractor/vimple.py | 49 +++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index a2f93afe3..f3a807cd3 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -1,53 +1,66 @@ # coding: utf-8 from __future__ import unicode_literals -import re, zlib, base64 -import xml.etree.ElementTree +import re +import zlib +import base64 +import xml.etree.ElementTree from .common import InfoExtractor + class VimpleIE(InfoExtractor): IE_DESC = 'Vimple.ru' - _VALID_URL = r'https?://player.vimple.ru/iframe/(?P[a-f0-9]+)' + _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P[a-f0-9]{10,})' _TESTS = [ + # Quality: Large, from iframe { - # Quality: Large, from iframe 'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c', 'info_dict': { 'id': 'b132bdfd71b546d3972f9ab9a25f201c', 'title': 'great-escape-minecraft.flv', - 'ext':'mp4', + 'ext': 'mp4', 'duration': 352, 'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c', - }, - } + }, + }, + # Quality: Medium, from mainpage + { + 'url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', + 'info_dict': { + 'id': 'a15950562888453b8e6f9572dc8600cd', + 'title': 'DB 01', + 'ext': 'flv', + 'duration': 1484, + 'webpage_url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', + } + }, ] - - #http://jsunpack-n.googlecode.com/svn-history/r63/trunk/swf.py + + # http://jsunpack-n.googlecode.com/svn-history/r63/trunk/swf.py def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - + iframe_url = 'http://player.vimple.ru/iframe/%s' % video_id iframe = self._download_webpage(iframe_url, video_id, note='Downloading iframe', errnote='unable to fetch iframe') player_url = self._html_search_regex(r'"(http://player.vimple.ru/flash/.+?)"', iframe, 'player url') - + player = self._request_webpage(player_url, video_id, note='Downloading swf player').read() - #http://stackoverflow.com/a/6804758 - #http://stackoverflow.com/a/12073686 + # http://stackoverflow.com/a/6804758 + # http://stackoverflow.com/a/12073686 player = zlib.decompress(player[8:]) - xml_pieces = re.findall(b'([a-zA-Z0-9 =+/]{500})', player) xml_pieces = [piece[1:-1] for piece in xml_pieces] - + xml_data = b''.join(xml_pieces) xml_data = base64.b64decode(xml_data) - + xml_data = xml.etree.ElementTree.fromstring(xml_data) - + video = xml_data.find('Video') quality = video.get('quality') q_tag = video.find(quality.capitalize()) @@ -69,5 +82,3 @@ class VimpleIE(InfoExtractor): 'duration': int(video.get('duration')), 'webpage_url': video.find('Share').get('videoPageUrl'), } - - From 31a196d7f55d7d7676c08553474a5ec122178177 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= Date: Sun, 29 Jun 2014 13:45:10 +0200 Subject: [PATCH 12/89] [TeacherTube] add user + collection, removed classrooms --- test/test_playlists.py | 10 +++++----- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/teachertube.py | 28 +++++++++++++++++++--------- 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index 71dac1b02..994b1d4b0 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -28,7 +28,7 @@ from youtube_dl.extractor import ( SoundcloudSetIE, SoundcloudUserIE, SoundcloudPlaylistIE, - TeacherTubeClassroomIE, + TeacherTubeUserIE, LivestreamIE, LivestreamOriginalIE, NHLVideocenterIE, @@ -379,13 +379,13 @@ class TestPlaylists(unittest.TestCase): result['title'], 'Brace Yourself - Today\'s Weirdest News') self.assertTrue(len(result['entries']) >= 10) - def test_TeacherTubeClassroom(self): + def test_TeacherTubeUser(self): dl = FakeYDL() - ie = TeacherTubeClassroomIE(dl) - result = ie.extract('http://www.teachertube.com/view_classroom.php?user=rbhagwati2') + ie = TeacherTubeUserIE(dl) + result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'rbhagwati2') - self.assertTrue(len(result['entries']) >= 20) + self.assertTrue(len(result['entries']) >= 179) if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f910d1a26..24b046173 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -282,7 +282,7 @@ from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE from .teachertube import ( TeacherTubeIE, - TeacherTubeClassroomIE, + TeacherTubeUserIE, ) from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index d9868d569..73b4a3634 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -86,22 +86,32 @@ class TeacherTubeIE(InfoExtractor): } -class TeacherTubeClassroomIE(InfoExtractor): - IE_NAME = 'teachertube:classroom' - IE_DESC = 'teachertube.com online classrooms' +class TeacherTubeUserIE(InfoExtractor): + IE_NAME = 'teachertube:user:collection' + IE_DESC = 'teachertube.com user and collection videos' - _VALID_URL = r'https?://(?:www\.)?teachertube\.com/view_classroom\.php\?user=(?P[0-9a-zA-Z]+)' + _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P[0-9a-zA-Z]+)/?' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) user_id = mobj.group('user') - rss = self._download_xml( - 'http://www.teachertube.com/rssclassroom.php?mode=user&username=%s' % user_id, - user_id, 'Downloading classroom RSS') + urls = [] + webpage = self._download_webpage(url, user_id) + urls.extend(re.findall( + r'"sidebar_thumb_time">[0-9:]+\s+', + webpage)) + + pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1] + for p in pages: + more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p) + webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1)) + urls.extend(re.findall( + r'"sidebar_thumb_time">[0-9:]+\s+', + webpage)) entries = [] - for url in rss.findall('.//{http://search.yahoo.com/mrss/}player'): - entries.append(self.url_result(url.attrib['url'], 'TeacherTube')) + for url in urls: + entries.append(self.url_result(url, 'TeacherTube')) return self.playlist_result(entries, user_id) From 57bdc730e264ffbc93be55fe1541ca40fce48c49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jun 2014 19:33:39 +0700 Subject: [PATCH 13/89] [vk] Add support for more URL formats (#3172) --- youtube_dl/extractor/vk.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 66fe1dd3e..c48528ad9 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -16,7 +16,7 @@ from ..utils import ( class VKIE(InfoExtractor): IE_NAME = 'vk.com' - _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P-?\d+).*?\bid=(?P\d+)|(?:videos.*?\?.*?z=)?video(?P.*?)(?:\?|%2F|$))' + _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P-?\d+).*?\bid=(?P\d+)|(?:.+?\?.*?z=)?video(?P.*?)(?:\?|%2F|$))' _NETRC_MACHINE = 'vk' _TESTS = [ @@ -62,11 +62,35 @@ class VKIE(InfoExtractor): 'id': '164049491', 'ext': 'mp4', 'uploader': 'Триллеры', - 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]\u00a0', + 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]', 'duration': 8352, }, 'skip': 'Requires vk account credentials', }, + { + 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a', + 'md5': 'd82c22e449f036282d1d3f7f4d276869', + 'info_dict': { + 'id': '166094326', + 'ext': 'mp4', + 'uploader': 'Киномания - лучшее из мира кино', + 'title': 'Запах женщины (1992)', + 'duration': 9392, + }, + 'skip': 'Requires vk account credentials', + }, + { + 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d', + 'md5': '4d7a5ef8cf114dfa09577e57b2993202', + 'info_dict': { + 'id': '168067957', + 'ext': 'mp4', + 'uploader': 'Киномания - лучшее из мира кино', + 'title': ' ', + 'duration': 7291, + }, + 'skip': 'Requires vk account credentials', + }, ] def _login(self): From a8a98e43f214e6fe5d322dca3534a8ec926890b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jun 2014 19:51:00 +0700 Subject: [PATCH 14/89] [vk] Add support for mobile URLs --- youtube_dl/extractor/vk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index c48528ad9..4afb05923 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -16,7 +16,7 @@ from ..utils import ( class VKIE(InfoExtractor): IE_NAME = 'vk.com' - _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P-?\d+).*?\bid=(?P\d+)|(?:.+?\?.*?z=)?video(?P.*?)(?:\?|%2F|$))' + _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P-?\d+).*?\bid=(?P\d+)|(?:.+?\?.*?z=)?video(?P.*?)(?:\?|%2F|$))' _NETRC_MACHINE = 'vk' _TESTS = [ From 36fbc6887f22603444ec70ca2d690be0f3b4f5f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jun 2014 20:06:47 +0700 Subject: [PATCH 15/89] [ivi] Add support for embedded URLs --- youtube_dl/extractor/ivi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 528be1524..4027deb70 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -14,7 +14,7 @@ from ..utils import ( class IviIE(InfoExtractor): IE_DESC = 'ivi.ru' IE_NAME = 'ivi' - _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P[^/]+))?/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P\d+)' _TESTS = [ # Single movie From 849086a1ae153e0dbc5047cbcf8324938d7b7036 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jun 2014 20:07:59 +0700 Subject: [PATCH 16/89] [vk] Better support for embeds --- youtube_dl/extractor/vk.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 4afb05923..6c7db7a6f 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -91,6 +91,17 @@ class VKIE(InfoExtractor): }, 'skip': 'Requires vk account credentials', }, + { + 'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540', + 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6', + 'note': 'ivi.ru embed', + 'info_dict': { + 'id': '60690', + 'ext': 'mp4', + 'title': 'Книга Илая', + 'duration': 6771, + }, + }, ] def _login(self): @@ -134,6 +145,16 @@ class VKIE(InfoExtractor): if m_yt is not None: self.to_screen('Youtube video detected') return self.url_result(m_yt.group(1), 'Youtube') + + m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page) + if m_opts: + m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1)) + if m_opts_url: + opts_url = m_opts_url.group(1) + if opts_url.startswith('//'): + opts_url = 'http:' + opts_url + return self.url_result(opts_url) + data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars') data = json.loads(data_json) From 0364fa8b65a6c6742454ec5f3a858e06dc1527f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jun 2014 20:18:23 +0700 Subject: [PATCH 17/89] [generic] Add support for ivi.ru embedded player --- youtube_dl/extractor/generic.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9dd03aba4..869efb215 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -620,6 +620,11 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'VK') + # Look for embedded ivi player + mobj = re.search(r']+?src=(["\'])(?Phttps?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'Ivi') + # Look for embedded Huffington Post player mobj = re.search( r']+?src=(["\'])(?Phttps?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage) From 41b610acab43c03f71fb64ae55c0912352143ae7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= Date: Sun, 29 Jun 2014 16:43:31 +0200 Subject: [PATCH 18/89] [GooglePlus] fix video title extraction --- youtube_dl/extractor/googleplus.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index cc29a7e5d..07d994b44 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -52,8 +52,7 @@ class GooglePlusIE(InfoExtractor): # Extract title # Get the first line for title - video_title = self._html_search_regex(r' Date: Sun, 29 Jun 2014 20:33:46 +0200 Subject: [PATCH 19/89] [teachertube:user] fix regex --- youtube_dl/extractor/teachertube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index 73b4a3634..1a438e1e4 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -99,7 +99,7 @@ class TeacherTubeUserIE(InfoExtractor): urls = [] webpage = self._download_webpage(url, user_id) urls.extend(re.findall( - r'"sidebar_thumb_time">[0-9:]+\s+', + r'"sidebar_thumb_time">[0-9:]+\s+', webpage)) pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1] @@ -107,7 +107,7 @@ class TeacherTubeUserIE(InfoExtractor): more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p) webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1)) urls.extend(re.findall( - r'"sidebar_thumb_time">[0-9:]+\s+', + r'"sidebar_thumb_time">[0-9:]+\s+', webpage)) entries = [] From d518d06efd143072a64bcd5dc51e16c89bda06c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 30 Jun 2014 03:16:31 +0700 Subject: [PATCH 20/89] [vk] Skip georestricted ivi embed test --- youtube_dl/extractor/vk.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 6c7db7a6f..918bd1098 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -101,6 +101,7 @@ class VKIE(InfoExtractor): 'title': 'Книга Илая', 'duration': 6771, }, + 'skip': 'Only works from Russia', }, ] From 7807ee664dab04673b18722c4313abe09ee6b1be Mon Sep 17 00:00:00 2001 From: pulpe Date: Tue, 1 Jul 2014 09:59:57 +0200 Subject: [PATCH 21/89] [wdr] fix test --- youtube_dl/extractor/wdr.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index feeb44b45..f741ba540 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from __future__ import unicode_literals import re @@ -54,14 +55,14 @@ class WDRIE(InfoExtractor): }, }, { - 'url': 'http://www.funkhauseuropa.de/av/audiogrenzenlosleckerbaklava101-audioplayer.html', - 'md5': 'cfff440d4ee64114083ac44676df5d15', + 'url': 'http://www.funkhauseuropa.de/av/audiosuepersongsoulbossanova100-audioplayer.html', + 'md5': '24e83813e832badb0a8d7d1ef9ef0691', 'info_dict': { - 'id': 'mdb-363068', + 'id': 'mdb-463528', 'ext': 'mp3', - 'title': 'Grenzenlos lecker - Baklava', + 'title': 'Süpersong: Soul Bossa Nova', 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', - 'upload_date': '20140311', + 'upload_date': '20140630', }, }, ] @@ -127,9 +128,10 @@ class WDRMobileIE(InfoExtractor): 'info_dict': { 'title': '4283021', 'id': '421735', + 'ext': 'mp4', 'age_limit': 0, }, - '_skip': 'Will be depublicized shortly' + 'skip': 'Problems with loading data.' } def _real_extract(self, url): @@ -139,6 +141,7 @@ class WDRMobileIE(InfoExtractor): 'title': mobj.group('title'), 'age_limit': int(mobj.group('age_limit')), 'url': url, + 'ext': determine_ext(url), 'user_agent': 'mobile', } From 29f6ed78e87946979ab6472b118a4da7cf7ef0c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= Date: Tue, 1 Jul 2014 10:35:49 +0200 Subject: [PATCH 22/89] [tagesschau] replace 404 test --- youtube_dl/extractor/tagesschau.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index 36331529e..25b9864ad 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -20,13 +20,13 @@ class TagesschauIE(InfoExtractor): 'thumbnail': 're:^http:.*\.jpg$', }, }, { - 'url': 'http://www.tagesschau.de/multimedia/video/video-196.html', - 'md5': '8aaa8bf3ae1ca2652309718c03019128', + 'url': 'http://www.tagesschau.de/multimedia/video/video-5964.html', + 'md5': '66652566900963a3f962333579eeffcf', 'info_dict': { - 'id': '196', + 'id': '5964', 'ext': 'mp4', - 'title': 'Ukraine-Konflikt: Klitschko in Kiew als Bürgermeister vereidigt', - 'description': 'md5:f22e4af75821d174fa6c977349682691', + 'title': 'Nahost-Konflikt: Israel bombadiert Ziele im Gazastreifen und Westjordanland', + 'description': 'md5:07bfc78c48eec3145ed4805299a1900a', 'thumbnail': 're:http://.*\.jpg', }, }] From c67f584eb3cb3fe9ccb6ace6b6ed96594ca7799d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 1 Jul 2014 19:24:18 +0700 Subject: [PATCH 23/89] [rai] Skip test --- youtube_dl/extractor/rai.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index cb4305349..ba3dd707f 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -35,7 +35,8 @@ class RaiIE(SubtitlesInfoExtractor): 'description': '', 'upload_date': '20140612', 'duration': 1758, - } + }, + 'skip': 'Error 404', }, { 'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html', From c4808c6009aef29c908139ee529f4938b7df8190 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 1 Jul 2014 15:48:18 +0200 Subject: [PATCH 24/89] [youtube_truncated_url] Add support for truncated watch URLs with annotations (#3178) --- youtube_dl/extractor/youtube.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6bdea1c44..ec3024cbd 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1825,10 +1825,18 @@ class YoutubeTruncatedURLIE(InfoExtractor): IE_NAME = 'youtube:truncated_url' IE_DESC = False # Do not list _VALID_URL = r'''(?x) - (?:https?://)?[^/]+/watch\?(?:feature=[a-z_]+)?$| + (?:https?://)?[^/]+/watch\?(?: + feature=[a-z_]+| + annotation_id=annotation_[^&]+ + )?$| (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$ ''' + _TESTS = [{ + 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041', + 'only_matching': True, + }] + def _real_extract(self, url): raise ExtractorError( u'Did you forget to quote the URL? Remember that & is a meta ' From dc2fc736911b5b8d769becd9227976e5caf267dc Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 1 Jul 2014 15:49:34 +0200 Subject: [PATCH 25/89] [youtube:truncated_url] Move test to extractor --- test/test_all_urls.py | 3 --- youtube_dl/extractor/youtube.py | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 4b56137ce..2bc81f020 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -69,9 +69,6 @@ class TestAllURLsMatching(unittest.TestCase): def test_youtube_show_matching(self): self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) - def test_youtube_truncated(self): - self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url']) - def test_youtube_search_matching(self): self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ec3024cbd..bf0fbc924 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1835,6 +1835,9 @@ class YoutubeTruncatedURLIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041', 'only_matching': True, + }, { + 'url': 'http://www.youtube.com/watch?', + 'only_matching': True, }] def _real_extract(self, url): From 2fd466fcfcc9895230f806379d149389236acde2 Mon Sep 17 00:00:00 2001 From: hakatashi Date: Wed, 2 Jul 2014 02:32:54 +0900 Subject: [PATCH 26/89] [niconico] Download without authentication --- youtube_dl/extractor/niconico.py | 39 +++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 517a72561..ba7464cb8 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -39,15 +39,18 @@ class NiconicoIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$' _NETRC_MACHINE = 'niconico' + # Determine whether the downloader uses authentication to download video + _AUTHENTICATE = False def _real_initialize(self): - self._login() + if self._downloader.params.get('username', None) is not None: + self._AUTHENTICATE = True + + if self._AUTHENTICATE: + self._login() def _login(self): (username, password) = self._get_login_info() - if username is None: - # Login is required - raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) # Log in login_form_strs = { @@ -79,10 +82,30 @@ class NiconicoIE(InfoExtractor): 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, note='Downloading video info page') - # Get flv info - flv_info_webpage = self._download_webpage( - 'http://flapi.nicovideo.jp/api/getflv?v=' + video_id, - video_id, 'Downloading flv info') + if self._AUTHENTICATE: + # Get flv info + flv_info_webpage = self._download_webpage( + 'http://flapi.nicovideo.jp/api/getflv?v=' + video_id, + video_id, 'Downloading flv info') + else: + # Get external player info + ext_player_info = self._download_webpage( + 'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id) + thumb_play_key = self._search_regex( + r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey') + + # Get flv info + flv_info_data = compat_urllib_parse.urlencode({ + 'k': thumb_play_key, + 'v': video_id + }) + flv_info_request = compat_urllib_request.Request( + 'http://ext.nicovideo.jp/thumb_watch', flv_info_data, + {'Content-Type': 'application/x-www-form-urlencoded'}) + flv_info_webpage = self._download_webpage( + flv_info_request, video_id, + note='Downloading flv info', errnote='Unable to download flv info') + video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0] # Start extracting information From 64ed7a38f98e9b01feae757bec25b81db80b29f6 Mon Sep 17 00:00:00 2001 From: hakatashi Date: Wed, 2 Jul 2014 03:13:12 +0900 Subject: [PATCH 27/89] [niconico] Add support for channel video --- youtube_dl/extractor/niconico.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 517a72561..43d8644a4 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -37,7 +37,7 @@ class NiconicoIE(InfoExtractor): }, } - _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$' + _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z][a-z])?[0-9]+)(?:.*)$' _NETRC_MACHINE = 'niconico' def _real_initialize(self): @@ -91,20 +91,18 @@ class NiconicoIE(InfoExtractor): video_format = video_extension.upper() video_thumbnail = video_info.find('.//thumbnail_url').text video_description = video_info.find('.//description').text - video_uploader_id = video_info.find('.//user_id').text video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0]) video_view_count = video_info.find('.//view_counter').text video_webpage_url = video_info.find('.//watch_url').text # uploader - video_uploader = video_uploader_id - url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id - try: - user_info = self._download_xml( - url, video_id, note='Downloading user information') - video_uploader = user_info.find('.//nickname').text - except ExtractorError as err: - self._downloader.report_warning('Unable to download user info webpage: %s' % compat_str(err)) + # No need to fetch extra resources...new API has field for uploader's name + if video_info.find('.//ch_id') is not None: + video_uploader_id = video_info.find('.//ch_id').text + video_uploader = video_info.find('.//ch_name').text + elif video_info.find('.//user_id') is not None: + video_uploader_id = video_info.find('.//user_id').text + video_uploader = video_info.find('.//user_nickname').text return { 'id': video_id, From 93881db22a331ac7ce855c4680998aedf9a68cfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Jul 2014 19:24:01 +0700 Subject: [PATCH 28/89] [anitube] Modernize --- youtube_dl/extractor/anitube.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py index 2b019daa9..31f0d417c 100644 --- a/youtube_dl/extractor/anitube.py +++ b/youtube_dl/extractor/anitube.py @@ -1,22 +1,24 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor class AnitubeIE(InfoExtractor): - IE_NAME = u'anitube.se' + IE_NAME = 'anitube.se' _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P\d+)' _TEST = { - u'url': u'http://www.anitube.se/video/36621', - u'md5': u'59d0eeae28ea0bc8c05e7af429998d43', - u'file': u'36621.mp4', - u'info_dict': { - u'id': u'36621', - u'ext': u'mp4', - u'title': u'Recorder to Randoseru 01', + 'url': 'http://www.anitube.se/video/36621', + 'md5': '59d0eeae28ea0bc8c05e7af429998d43', + 'info_dict': { + 'id': '36621', + 'ext': 'mp4', + 'title': 'Recorder to Randoseru 01', + 'duration': 180.19, }, - u'skip': u'Blocked in the US', + 'skip': 'Blocked in the US', } def _real_extract(self, url): @@ -24,13 +26,15 @@ class AnitubeIE(InfoExtractor): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', - webpage, u'key') + key = self._html_search_regex( + r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', webpage, 'key') - config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key, - key) + config_xml = self._download_xml( + 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key) video_title = config_xml.find('title').text + thumbnail = config_xml.find('image').text + duration = float(config_xml.find('duration').text) formats = [] video_url = config_xml.find('file') @@ -49,5 +53,7 @@ class AnitubeIE(InfoExtractor): return { 'id': video_id, 'title': video_title, + 'thumbnail': thumbnail, + 'duration': duration, 'formats': formats } From 7aeb67b39b055e9586e7ab21c108a3176cfe0203 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 3 Jul 2014 21:08:44 +0700 Subject: [PATCH 29/89] [teachertube:user:collection] Update media regex --- youtube_dl/extractor/teachertube.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index 1a438e1e4..7167a036e 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -92,23 +92,21 @@ class TeacherTubeUserIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P[0-9a-zA-Z]+)/?' + _MEDIA_RE = r'(?s)"sidebar_thumb_time">[0-9:]+.+?' + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) user_id = mobj.group('user') urls = [] webpage = self._download_webpage(url, user_id) - urls.extend(re.findall( - r'"sidebar_thumb_time">[0-9:]+\s+', - webpage)) + urls.extend(re.findall(self._MEDIA_RE, webpage)) pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1] for p in pages: more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p) webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1)) - urls.extend(re.findall( - r'"sidebar_thumb_time">[0-9:]+\s+', - webpage)) + urls.extend(re.findall(self._MEDIA_RE, webpage)) entries = [] for url in urls: From 1e07fea200275a1230b80e405918cdeb29d1afd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 3 Jul 2014 21:11:56 +0700 Subject: [PATCH 30/89] [teachertube] Add support for new video URL format --- youtube_dl/extractor/teachertube.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index 7167a036e..2c2113b14 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -14,7 +14,7 @@ class TeacherTubeIE(InfoExtractor): IE_NAME = 'teachertube' IE_DESC = 'teachertube.com videos' - _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/|audio/)(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/(?:[\da-z-]+-)?|audio/)(?P\d+)' _TESTS = [{ 'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997', @@ -45,6 +45,15 @@ class TeacherTubeIE(InfoExtractor): 'title': 'PER ASPERA AD ASTRA', 'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P', }, + }, { + 'url': 'http://www.teachertube.com/video/intro-video-schleicher-297790', + 'md5': '9c79fbb2dd7154823996fc28d4a26998', + 'info_dict': { + 'id': '297790', + 'ext': 'mp4', + 'title': 'Intro Video - Schleicher', + 'description': 'Intro Video - Why to flip, how flipping will', + }, }] def _real_extract(self, url): From 6feb2d5e803dee49b2e4a8f3a7f33ca7f01f96b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Jul 2014 19:21:19 +0700 Subject: [PATCH 31/89] [youtube:search_url] Update regexes --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index bf0fbc924..f420b8148 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1698,14 +1698,14 @@ class YoutubeSearchURLIE(InfoExtractor): webpage = self._download_webpage(url, query) result_code = self._search_regex( - r'(?s)
    ', webpage, u'result HTML') + r'(?s)
      ', webpage, u'result HTML') part_codes = re.findall( r'(?s)

      (.*?)

      ', result_code) entries = [] for part_code in part_codes: part_title = self._html_search_regex( - r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False) + [r'(?s)title="([^"]+)"', r'>([^<]+)
      '], part_code, 'item title', fatal=False) part_url_snippet = self._html_search_regex( r'(?s)href="([^"]+)"', part_code, 'item URL') part_url = compat_urlparse.urljoin( From 15ce1338b42f906a5b9f812b8f8b5287eab8a20a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Jul 2014 22:05:46 +0700 Subject: [PATCH 32/89] [niconico] Extract more metadata and simplify (Closes #3181) --- youtube_dl/extractor/niconico.py | 57 +++++++++++++++++--------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 43d8644a4..31f60041c 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -8,10 +8,10 @@ from ..utils import ( compat_urllib_parse, compat_urllib_request, compat_urlparse, - compat_str, - ExtractorError, unified_strdate, + parse_duration, + int_or_none, ) @@ -30,6 +30,7 @@ class NiconicoIE(InfoExtractor): 'uploader_id': '2698420', 'upload_date': '20131123', 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', + 'duration': 33, }, 'params': { 'username': 'ydl.niconico@gmail.com', @@ -37,7 +38,7 @@ class NiconicoIE(InfoExtractor): }, } - _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z][a-z])?[0-9]+)(?:.*)$' + _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)' _NETRC_MACHINE = 'niconico' def _real_initialize(self): @@ -86,35 +87,39 @@ class NiconicoIE(InfoExtractor): video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0] # Start extracting information - video_title = video_info.find('.//title').text - video_extension = video_info.find('.//movie_type').text - video_format = video_extension.upper() - video_thumbnail = video_info.find('.//thumbnail_url').text - video_description = video_info.find('.//description').text - video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0]) - video_view_count = video_info.find('.//view_counter').text - video_webpage_url = video_info.find('.//watch_url').text + title = video_info.find('.//title').text + extension = video_info.find('.//movie_type').text + video_format = extension.upper() + thumbnail = video_info.find('.//thumbnail_url').text + description = video_info.find('.//description').text + upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0]) + view_count = int_or_none(video_info.find('.//view_counter').text) + comment_count = int_or_none(video_info.find('.//comment_num').text) + duration = parse_duration(video_info.find('.//length').text) + webpage_url = video_info.find('.//watch_url').text - # uploader - # No need to fetch extra resources...new API has field for uploader's name if video_info.find('.//ch_id') is not None: - video_uploader_id = video_info.find('.//ch_id').text - video_uploader = video_info.find('.//ch_name').text + uploader_id = video_info.find('.//ch_id').text + uploader = video_info.find('.//ch_name').text elif video_info.find('.//user_id') is not None: - video_uploader_id = video_info.find('.//user_id').text - video_uploader = video_info.find('.//user_nickname').text + uploader_id = video_info.find('.//user_id').text + uploader = video_info.find('.//user_nickname').text + else: + uploader_id = uploader = None return { 'id': video_id, 'url': video_real_url, - 'title': video_title, - 'ext': video_extension, + 'title': title, + 'ext': extension, 'format': video_format, - 'thumbnail': video_thumbnail, - 'description': video_description, - 'uploader': video_uploader, - 'upload_date': video_upload_date, - 'uploader_id': video_uploader_id, - 'view_count': video_view_count, - 'webpage_url': video_webpage_url, + 'thumbnail': thumbnail, + 'description': description, + 'uploader': uploader, + 'upload_date': upload_date, + 'uploader_id': uploader_id, + 'view_count': view_count, + 'comment_count': comment_count, + 'duration': duration, + 'webpage_url': webpage_url, } From b67f1840a181e682ca0e16b74aae7fe39fe2192f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Jul 2014 22:26:56 +0700 Subject: [PATCH 33/89] [niconico] Remove unused import --- youtube_dl/extractor/niconico.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index d98131271..c0c139b5d 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -8,7 +8,6 @@ from ..utils import ( compat_urllib_parse, compat_urllib_request, compat_urlparse, - ExtractorError, unified_strdate, parse_duration, int_or_none, From ba4133c9eb3ef342e2c1505e576f3fda674fe04c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Jul 2014 22:30:43 +0700 Subject: [PATCH 34/89] Credit @hakatashi for #3181 #3182 --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 1e01432d2..37c40cb79 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -59,6 +59,7 @@ __authors__ = ( 'Adam Thalhammer', 'Georg Jähnig', 'Ralf Haring', + 'Koki Takahashi', ) __license__ = 'Public Domain' From 49cbe7c8e3b35f9492ee1dd816ed011aa3980d82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= Date: Sat, 5 Jul 2014 14:42:26 +0200 Subject: [PATCH 35/89] [allocine] add extractor for allocine.fr (fixes #3189) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/allocine.py | 89 ++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 youtube_dl/extractor/allocine.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 24b046173..12cca5c2e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -3,6 +3,7 @@ from .addanime import AddAnimeIE from .aftonbladet import AftonbladetIE from .anitube import AnitubeIE from .aol import AolIE +from .allocine import AllocineIE from .aparat import AparatIE from .appletrailers import AppleTrailersIE from .archiveorg import ArchiveOrgIE diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py new file mode 100644 index 000000000..34f0cd49b --- /dev/null +++ b/youtube_dl/extractor/allocine.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_str, + qualities, + determine_ext, +) + + +class AllocineIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?Particle|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P[0-9]+)(?:\.html)?' + + _TESTS = [{ + 'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html', + 'md5': '0c9fcf59a841f65635fa300ac43d8269', + 'info_dict': { + 'id': '19546517', + 'ext': 'mp4', + 'title': 'Astérix - Le Domaine des Dieux Teaser VF', + 'description': 'md5:4a754271d9c6f16c72629a8a993ee884', + 'thumbnail': 're:http://.*\.jpg', + }, + }, { + 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html', + 'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0', + 'info_dict': { + 'id': '19540403', + 'ext': 'mp4', + 'title': 'Planes 2 Bande-annonce VF', + 'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d', + 'thumbnail': 're:http://.*\.jpg', + }, + }, { + 'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html', + 'md5': '101250fb127ef9ca3d73186ff22a47ce', + 'info_dict': { + 'id': '19544709', + 'ext': 'mp4', + 'title': 'Dragons 2 - Bande annonce finale VF', + 'description': 'md5:e74a4dc750894bac300ece46c7036490', + 'thumbnail': 're:http://.*\.jpg', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + typ = mobj.group('typ') + display_id = mobj.group('id') + + webpage = self._download_webpage(url, display_id) + + if typ == 'film': + video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id') + else: + player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player') + + player_data = json.loads(player) + video_id = compat_str(player_data['refMedia']) + + xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id) + + video = xml.find('.//AcVisionVideo').attrib + quality = qualities(['ld', 'md', 'hd']) + + formats = [] + for k, v in video.items(): + if re.match(r'.+_path', k): + format_id = k.split('_')[0] + formats.append({ + 'format_id': format_id, + 'quality': quality(format_id), + 'url': v, + 'ext': determine_ext(v), + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video['videoTitle'], + 'thumbnail': self._og_search_thumbnail(webpage), + 'formats': formats, + 'description': self._og_search_description(webpage), + } From 7571c02c8ad38919654d3cdd21ec567f57fe2451 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 6 Jul 2014 11:22:44 +0200 Subject: [PATCH 36/89] [generic] Set default-search to error This prevents users from submitting bug reports where they mistyped a URL, and prevents me from getting a weird video when holding shift and thus searching for :Tds --- youtube_dl/__init__.py | 2 +- youtube_dl/extractor/generic.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 37c40cb79..31ed63fcc 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -270,7 +270,7 @@ def parseOpts(overrideArguments=None): general.add_option( '--default-search', dest='default_search', metavar='PREFIX', - help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.') + help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.') general.add_option( '--ignore-config', action='store_true', diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 869efb215..f97b59845 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -383,7 +383,7 @@ class GenericIE(InfoExtractor): if not parsed_url.scheme: default_search = self._downloader.params.get('default_search') if default_search is None: - default_search = 'auto_warning' + default_search = 'error' if default_search in ('auto', 'auto_warning'): if '/' in url: @@ -397,8 +397,13 @@ class GenericIE(InfoExtractor): expected=True) else: self._downloader.report_warning( - 'Falling back to youtube search for %s . Set --default-search to "auto" to suppress this warning.' % url) + 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url) return self.url_result('ytsearch:' + url) + elif default_search == 'error': + raise ExtractorError( + ('%r is not a valid URL. ' + 'Set --default-search "ytseach" (or run youtube-dl "ytsearch:%s" ) to search YouTube' + ) % (url, url), expected=True) else: assert ':' in default_search return self.url_result(default_search + url) From 8d5797b00f2640cfc5d75ea0189e06d85a360639 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 6 Jul 2014 11:28:51 +0200 Subject: [PATCH 37/89] [YoutubeDL] Show download URL when -v is set This will allow us to debug issues like #3204 --- youtube_dl/YoutubeDL.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index dc0ba986a..3dff723b8 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -993,6 +993,8 @@ class YoutubeDL(object): fd = get_suitable_downloader(info)(self, self.params) for ph in self._progress_hooks: fd.add_progress_hook(ph) + if self.params.get('verbose'): + self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) return fd.download(name, info) if info_dict.get('requested_formats') is not None: downloaded = [] From 76bafa8ffe4631405bba17eb447f366b5c8ce734 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Jul 2014 18:53:31 +0700 Subject: [PATCH 38/89] [newstube] Capture error message --- youtube_dl/extractor/newstube.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/newstube.py b/youtube_dl/extractor/newstube.py index 2fd5b8f04..a860350af 100644 --- a/youtube_dl/extractor/newstube.py +++ b/youtube_dl/extractor/newstube.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ExtractorError class NewstubeIE(InfoExtractor): @@ -40,6 +41,10 @@ class NewstubeIE(InfoExtractor): def ns(s): return s.replace('/', '/%(ns)s') % {'ns': '{http://app1.newstube.ru/N2SiteWS/player.asmx}'} + error_message = player.find(ns('./ErrorMessage')) + if error_message is not None: + raise ExtractorError('%s returned error: %s' % (self.IE_NAME, error_message.text), expected=True) + session_id = player.find(ns('./SessionId')).text media_info = player.find(ns('./Medias/MediaInfo')) title = media_info.find(ns('./Name')).text From 1fd015516e8bb276c798983c243d45b6dd5054dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Jul 2014 19:32:13 +0700 Subject: [PATCH 39/89] [newstube] Replace test --- youtube_dl/extractor/newstube.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/newstube.py b/youtube_dl/extractor/newstube.py index a860350af..551bd4d7a 100644 --- a/youtube_dl/extractor/newstube.py +++ b/youtube_dl/extractor/newstube.py @@ -10,13 +10,13 @@ from ..utils import ExtractorError class NewstubeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P.+)' _TEST = { - 'url': 'http://newstube.ru/media/na-korable-progress-prodolzhaetsya-testirovanie-sistemy-kurs', + 'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym', 'info_dict': { - 'id': 'd156a237-a6e9-4111-a682-039995f721f1', + 'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6', 'ext': 'flv', - 'title': 'На корабле «Прогресс» продолжается тестирование системы «Курс»', - 'description': 'md5:d0cbe7b4a6f600552617e48548d5dc77', - 'duration': 20.04, + 'title': 'Телеканал CNN переместил город Славянск в Крым', + 'description': 'md5:419a8c9f03442bc0b0a794d689360335', + 'duration': 31.05, }, 'params': { # rtmp download From 459af43494bca29ee0f079965b102e55ff72c04a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 7 Jul 2014 14:10:57 +0200 Subject: [PATCH 40/89] [arte] Manually set the rtmp play_path (fix #3198) rtmpdump doesn't parse it right --- youtube_dl/extractor/arte.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index b42102f3d..9591bad8a 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -39,7 +39,10 @@ class ArteTvIE(InfoExtractor): formats = [{ 'forma_id': q.attrib['quality'], - 'url': q.text, + # The playpath starts at 'mp4:', if we don't manually + # split the url, rtmpdump will incorrectly parse them + 'url': q.text.split('mp4:', 1)[0], + 'play_path': 'mp4:' + q.text.split('mp4:', 1)[1], 'ext': 'flv', 'quality': 2 if q.attrib['quality'] == 'hd' else 1, } for q in config.findall('./urls/url')] From 1aac03797ee43b40a410389aa3dfa4e4b2f2918d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 7 Jul 2014 20:12:59 +0700 Subject: [PATCH 41/89] [ninegag] Fix extraction --- youtube_dl/extractor/ninegag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py index c2e7b67c7..33daa0dec 100644 --- a/youtube_dl/extractor/ninegag.py +++ b/youtube_dl/extractor/ninegag.py @@ -47,7 +47,7 @@ class NineGagIE(InfoExtractor): webpage = self._download_webpage(url, display_id) post_view = json.loads(self._html_search_regex( - r'var postView = new app\.PostView\({\s*post:\s*({.+?}),', webpage, 'post view')) + r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view')) youtube_id = post_view['videoExternalId'] title = post_view['title'] From 3941669d691b337aa4bb1b13648a0573c37abd6e Mon Sep 17 00:00:00 2001 From: azeem Date: Mon, 7 Jul 2014 23:51:02 +0530 Subject: [PATCH 42/89] [soundcloud] Adding likes support to SoundcloudUserIE --- test/test_playlists.py | 8 ++++++++ youtube_dl/extractor/soundcloud.py | 15 +++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index 994b1d4b0..3a88cf270 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -137,6 +137,14 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['id'], '9615865') self.assertTrue(len(result['entries']) >= 12) + def test_soundcloud_likes(self): + dl = FakeYDL() + ie = SoundcloudUserIE(dl) + result = ie.extract('https://soundcloud.com/the-concept-band/likes') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], '9615865') + self.assertTrue(len(result['entries']) >= 1) + def test_soundcloud_playlist(self): dl = FakeYDL() ie = SoundcloudPlaylistIE(dl) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 7aa100fb2..14ec9452d 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -255,7 +255,7 @@ class SoundcloudSetIE(SoundcloudIE): class SoundcloudUserIE(SoundcloudIE): - _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P[^/]+)(/?(tracks/)?)?(\?.*)?$' + _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P[^/]+)/?((?Ptracks|likes)/?)?(\?.*)?$' IE_NAME = 'soundcloud:user' # it's in tests/test_playlists.py @@ -264,24 +264,31 @@ class SoundcloudUserIE(SoundcloudIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) uploader = mobj.group('user') + resource = mobj.group('rsrc') + if resource is None: + resource = 'tracks' + elif resource == 'likes': + resource = 'favorites' url = 'http://soundcloud.com/%s/' % uploader resolv_url = self._resolv_url(url) user = self._download_json( resolv_url, uploader, 'Downloading user info') - base_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % uploader + base_url = 'http://api.soundcloud.com/users/%s/%s.json?' % (uploader, resource) entries = [] for i in itertools.count(): data = compat_urllib_parse.urlencode({ 'offset': i * 50, + 'limit': 50, 'client_id': self._CLIENT_ID, }) new_entries = self._download_json( base_url + data, uploader, 'Downloading track page %s' % (i + 1)) - entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries) - if len(new_entries) < 50: + if len(new_entries) == 0: + self.to_screen('%s: End page received' % uploader) break + entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries) return { '_type': 'playlist', From 6e1e0e4b5b1952b17007cf6489e0d3e2bc2a513a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Jul 2014 20:22:27 +0700 Subject: [PATCH 43/89] [veoh] Skip deleted test video --- youtube_dl/extractor/veoh.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index fb132aef6..a7953a7e7 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -49,6 +49,7 @@ class VeohIE(InfoExtractor): 'description': 'md5:f5a11c51f8fb51d2315bca0937526891', 'uploader': 'newsy-videos', }, + 'skip': 'This video has been deleted.', }, ] From d6aa1967ad5b91cb12b306a9797c7c5097d54472 Mon Sep 17 00:00:00 2001 From: MikeCol Date: Wed, 9 Jul 2014 12:14:53 +0200 Subject: [PATCH 44/89] GoshGay Extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/goshgay.py | 72 ++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 youtube_dl/extractor/goshgay.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 12cca5c2e..e8598a2f5 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -112,6 +112,7 @@ from .generic import GenericIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE from .gorillavid import GorillaVidIE +from .goshgay import GoshgayIE from .hark import HarkIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py new file mode 100644 index 000000000..3f31ec896 --- /dev/null +++ b/youtube_dl/extractor/goshgay.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urlparse, + str_to_int, + ExtractorError, +) +import json + + +class GoshgayIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P\d+?)($|/)' + _TEST = { + 'url': 'http://www.goshgay.com/video4116282', + 'md5': '268b9f3c3229105c57859e166dd72b03', + 'info_dict': { + 'id': '4116282', + 'ext': 'flv', + 'title': 'md5:089833a4790b5e103285a07337f245bf', + 'thumbnail': 're:http://.*\.jpg', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + title = self._search_regex(r'class="video-title">

      (.+?)<', webpage, 'title') + + player_config = self._search_regex(r'jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings', + fatal=True, flags=re.S) + player_vars = json.loads(player_config.replace("'", '"')) + width = str_to_int(player_vars.get('width')) + height = str_to_int(player_vars.get('height')) + config_uri = player_vars.get('config') + + if config_uri is None: + raise ExtractorError('Missing config URI') + node = self._download_xml(config_uri, video_id, 'Downloading player config XML', + errnote='Unable to download XML') + if node is None: + raise ExtractorError('Missing config XML') + if node.tag != 'config': + raise ExtractorError('Missing config attribute') + fns = node.findall('file') + imgs = node.findall('image') + if len(fns) != 1: + raise ExtractorError('Missing media URI') + video_url = fns[0].text + if len(imgs) < 1: + thumbnail = None + else: + thumbnail = imgs[0].text + + url_comp = compat_urlparse.urlparse(url) + ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2]) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'width': width, + 'height': height, + 'thumbnail': thumbnail, + 'http_referer': ref, + 'age_limit': 18, + } From 411f691b213f12d8020547316fb6c16239732a35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 9 Jul 2014 19:12:42 +0700 Subject: [PATCH 45/89] [mpora] Fix player regex --- youtube_dl/extractor/mpora.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py index 39d6feb98..387935d4d 100644 --- a/youtube_dl/extractor/mpora.py +++ b/youtube_dl/extractor/mpora.py @@ -28,7 +28,7 @@ class MporaIE(InfoExtractor): webpage = self._download_webpage(url, video_id) data_json = self._search_regex( - r"new FM\.Player\('[^']+',\s*(\{.*?)\);\n", webpage, 'json') + r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json') data = json.loads(data_json) From 537ba6f3818004ef43e0067fd1be8dbd1bbeed46 Mon Sep 17 00:00:00 2001 From: pachacamac Date: Wed, 9 Jul 2014 18:21:46 +0200 Subject: [PATCH 46/89] [Vodlocker] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/vodlocker.py | 66 +++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 youtube_dl/extractor/vodlocker.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c3160df1e..1666aa372 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -333,6 +333,7 @@ from .vine import ( ) from .viki import VikiIE from .vk import VKIE +from .vodlocker import VodlockerIE from .vube import VubeIE from .vuclip import VuClipIE from .vulture import VultureIE diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py new file mode 100644 index 000000000..fdab0e7bf --- /dev/null +++ b/youtube_dl/extractor/vodlocker.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +import time +from .common import InfoExtractor +from ..utils import ( + determine_ext, + compat_urllib_parse, + compat_urllib_request, +) + + +class VodlockerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vodlocker.com/(?P[0-9a-zA-Z]+)(?:\..*?)?' + + _TESTS = [{ + 'url': 'http://vodlocker.com/e8wvyzz4sl42', + 'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf', + 'info_dict': { + 'id': 'e8wvyzz4sl42', + 'ext': 'mp4', + 'title': 'Germany vs Brazil', + 'thumbnail': 're:http://.*\.jpg', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + url = 'http://vodlocker.com/%s' % video_id + + webpage = self._download_webpage(url, video_id) + + fields = dict(re.findall(r'''(?x)\s*(.*?)\s* Date: Thu, 10 Jul 2014 14:49:16 +0200 Subject: [PATCH 47/89] release 2014.07.10 --- README.md | 5 +++-- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2bea609bf..dffdaa9dc 100644 --- a/README.md +++ b/README.md @@ -70,8 +70,9 @@ which means you can modify it, redistribute it or use it however you like. --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large - apple". By default (with value "auto") - youtube-dl guesses. + apple". Use the value "auto" to let + youtube-dl guess. The default value "error" + just throws an error. --ignore-config Do not read configuration files. When given in the global configuration file /etc /youtube-dl.conf: do not read the user diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ab076489f..a8804b650 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.06.26' +__version__ = '2014.07.10' From b3a88780802a83686671945471d042dd864e7ccb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 10:23:17 +0200 Subject: [PATCH 48/89] [youtube] Remove static signatures The always fail by now. Instead, use only automatic signature extraction --- youtube_dl/extractor/youtube.py | 50 ++------------------------------- 1 file changed, 3 insertions(+), 47 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f420b8148..15208f47f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -879,58 +879,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if self._downloader.params.get('youtube_print_sig_code'): self._print_sig_code(func, len(s)) return func(s) - except Exception: + except Exception as e: tb = traceback.format_exc() - self._downloader.report_warning( - u'Automatic signature extraction failed: ' + tb) - - self._downloader.report_warning( - u'Warning: Falling back to static signature algorithm') + raise ExtractorError( + u'Automatic signature extraction failed: ' + tb, cause=e) return self._static_decrypt_signature( s, video_id, player_url, age_gate) - def _static_decrypt_signature(self, s, video_id, player_url, age_gate): - if age_gate: - # The videos with age protection use another player, so the - # algorithms can be different. - if len(s) == 86: - return s[2:63] + s[82] + s[64:82] + s[63] - - if len(s) == 93: - return s[86:29:-1] + s[88] + s[28:5:-1] - elif len(s) == 92: - return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] - elif len(s) == 91: - return s[84:27:-1] + s[86] + s[26:5:-1] - elif len(s) == 90: - return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] - elif len(s) == 89: - return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1] - elif len(s) == 88: - return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28] - elif len(s) == 87: - return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] - elif len(s) == 86: - return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1] - elif len(s) == 85: - return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84] - elif len(s) == 84: - return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1] - elif len(s) == 83: - return s[80:63:-1] + s[0] + s[62:0:-1] + s[63] - elif len(s) == 82: - return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37] - elif len(s) == 81: - return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] - elif len(s) == 80: - return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80] - elif len(s) == 79: - return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] - - else: - raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) - def _get_available_subtitles(self, video_id, webpage): try: sub_list = self._download_webpage( From 6f9d4d542f8a5f565fe7811e6d07553f4d9c69cc Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 10:34:01 +0200 Subject: [PATCH 49/89] [youtube] Add test for new signature scheme (#3232) --- test/test_youtube_signature.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 8417c55a6..6e0fa14a8 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -33,6 +33,12 @@ _TESTS = [ 90, u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', ), + ( + u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', + u'js', + u'BF51B8F76F05D81CEAED01F5ACE376131B23D830.5805F8368CB04C36C973A8CF997B774AC4B685B77', + u'2909FDCA8C5E6D92D34B34E7C7AFFD7CA57532DA.5BA2848AD58DAA15002012C7CD77187D24E048A5', + ), ] @@ -44,7 +50,7 @@ class TestSignature(unittest.TestCase): os.mkdir(self.TESTDATA_DIR) -def make_tfunc(url, stype, sig_length, expected_sig): +def make_tfunc(url, stype, sig_input, expected_sig): basename = url.rpartition('/')[2] m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename) assert m, '%r should follow URL format' % basename @@ -66,7 +72,9 @@ def make_tfunc(url, stype, sig_length, expected_sig): with open(fn, 'rb') as testf: swfcode = testf.read() func = ie._parse_sig_swf(swfcode) - src_sig = compat_str(string.printable[:sig_length]) + src_sig = ( + compat_str(string.printable[:sig_input]) + if isinstance(sig_input, int) else sig_input) got_sig = func(src_sig) self.assertEqual(got_sig, expected_sig) From 61989fb5e9613b042c7f72d06e141242d60a1fde Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 10:40:02 +0200 Subject: [PATCH 50/89] [jsinterp] Remove superfluous u --- youtube_dl/jsinterp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 449482d3c..d7e76713f 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -59,7 +59,7 @@ class JSInterpreter(object): if member == 'split("")': return list(val) if member == 'join("")': - return u''.join(val) + return ''.join(val) if member == 'length': return len(val) if member == 'reverse()': From c8bf86d50d65ac434c7d683c21ec4d362f0cf030 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 10:44:39 +0200 Subject: [PATCH 51/89] [youtube] Correct signature extraction error detection --- youtube_dl/extractor/youtube.py | 39 ++++++++++++++++----------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 15208f47f..6123e1256 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -865,27 +865,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _decrypt_signature(self, s, video_id, player_url, age_gate=False): """Turn the encrypted s field into a working signature""" - if player_url is not None: - if player_url.startswith(u'//'): - player_url = u'https:' + player_url - try: - player_id = (player_url, len(s)) - if player_id not in self._player_cache: - func = self._extract_signature_function( - video_id, player_url, len(s) - ) - self._player_cache[player_id] = func - func = self._player_cache[player_id] - if self._downloader.params.get('youtube_print_sig_code'): - self._print_sig_code(func, len(s)) - return func(s) - except Exception as e: - tb = traceback.format_exc() - raise ExtractorError( - u'Automatic signature extraction failed: ' + tb, cause=e) + if player_url is None: + raise ExtractorError(u'Cannot decrypt signature without player_url') - return self._static_decrypt_signature( - s, video_id, player_url, age_gate) + if player_url.startswith(u'//'): + player_url = u'https:' + player_url + try: + player_id = (player_url, len(s)) + if player_id not in self._player_cache: + func = self._extract_signature_function( + video_id, player_url, len(s) + ) + self._player_cache[player_id] = func + func = self._player_cache[player_id] + if self._downloader.params.get('youtube_print_sig_code'): + self._print_sig_code(func, len(s)) + return func(s) + except Exception as e: + tb = traceback.format_exc() + raise ExtractorError( + u'Automatic signature extraction failed: ' + tb, cause=e) def _get_available_subtitles(self, video_id, webpage): try: From fc040bfd058202b0b8c9f69b12e3a2d32e8f380c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 10:44:56 +0200 Subject: [PATCH 52/89] [jsinterp] Prevent mis-recognitions of local functions --- youtube_dl/jsinterp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index d7e76713f..3bbb07704 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -99,7 +99,7 @@ class JSInterpreter(object): def extract_function(self, funcname): func_m = re.search( - (r'(?:function %s|%s\s*=\s*function)' % ( + (r'(?:function %s|[{;]%s\s*=\s*function)' % ( re.escape(funcname), re.escape(funcname))) + r'\((?P[a-z,]+)\){(?P[^}]+)}', self.code) From f64ebfe3e542e9648b8f9f268457de949d494901 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 10:46:08 +0200 Subject: [PATCH 53/89] [youtube] Correct signature test --- test/test_youtube_signature.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 6e0fa14a8..8d46fe108 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -36,8 +36,8 @@ _TESTS = [ ( u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', u'js', - u'BF51B8F76F05D81CEAED01F5ACE376131B23D830.5805F8368CB04C36C973A8CF997B774AC4B685B77', - u'2909FDCA8C5E6D92D34B34E7C7AFFD7CA57532DA.5BA2848AD58DAA15002012C7CD77187D24E048A5', + u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', + u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', ), ] From 391d53e1ddb55928a2aa7735487e166e582af024 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 10:49:41 +0200 Subject: [PATCH 54/89] release 2014.07.11 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a8804b650..d6b05892c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.10' +__version__ = '2014.07.11' From 4094b6e36d03a6230689657d87de7a58f3f0b581 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 10:57:08 +0200 Subject: [PATCH 55/89] [vodlocker] PEP8, generalization, and simplification (#3223) --- youtube_dl/extractor/common.py | 11 +++++++++-- youtube_dl/extractor/vodlocker.py | 19 +++++++++---------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e4e4feef9..f1ed30704 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1,11 +1,12 @@ import base64 import hashlib import json +import netrc import os import re import socket import sys -import netrc +import time import xml.etree.ElementTree from ..utils import ( @@ -575,6 +576,13 @@ class InfoExtractor(object): else: return url + def _sleep(self, timeout, video_id, msg_template=None): + if msg_template is None: + msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds' + msg = msg_template % {'video_id': video_id, 'timeout': timeout} + self.to_screen(msg) + time.sleep(timeout) + class SearchInfoExtractor(InfoExtractor): """ @@ -618,4 +626,3 @@ class SearchInfoExtractor(InfoExtractor): @property def SEARCH_KEY(self): return self._SEARCH_KEY - diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index fdab0e7bf..dfc570930 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -28,9 +28,6 @@ class VodlockerIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - - url = 'http://vodlocker.com/%s' % video_id - webpage = self._download_webpage(url, video_id) fields = dict(re.findall(r'''(?x)\s*(.*?)\s*\s*(.*?)\s* Date: Fri, 11 Jul 2014 11:01:59 +0200 Subject: [PATCH 56/89] [goshgay] PEP8 and test for age_limit (#3220) --- youtube_dl/extractor/goshgay.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py index 3f31ec896..7bca21ad0 100644 --- a/youtube_dl/extractor/goshgay.py +++ b/youtube_dl/extractor/goshgay.py @@ -22,6 +22,7 @@ class GoshgayIE(InfoExtractor): 'ext': 'flv', 'title': 'md5:089833a4790b5e103285a07337f245bf', 'thumbnail': 're:http://.*\.jpg', + 'age_limit': 18, } } @@ -32,8 +33,8 @@ class GoshgayIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._search_regex(r'class="video-title">

      (.+?)<', webpage, 'title') - player_config = self._search_regex(r'jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings', - fatal=True, flags=re.S) + player_config = self._search_regex( + r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings') player_vars = json.loads(player_config.replace("'", '"')) width = str_to_int(player_vars.get('width')) height = str_to_int(player_vars.get('height')) From 953b3586687f859d8b9fc7e8d9c155fb360ee587 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 11:05:16 +0200 Subject: [PATCH 57/89] [gorillavid] Add support for daclips.in (Closes #3213) --- youtube_dl/extractor/gorillavid.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py index aa15cafc3..50ef54cce 100644 --- a/youtube_dl/extractor/gorillavid.py +++ b/youtube_dl/extractor/gorillavid.py @@ -12,7 +12,12 @@ from ..utils import ( class GorillaVidIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gorillavid\.in/(?:embed-)?(?P[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?' + IE_DESC = 'GorillaVid.in and daclips.in' + _VALID_URL = r'''(?x) + https?://(?:www\.)? + (?:daclips\.in|gorillavid\.in)/ + (?:embed-)?(?P[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)? + ''' _TESTS = [{ 'url': 'http://gorillavid.in/06y9juieqpmi', @@ -32,14 +37,20 @@ class GorillaVidIE(InfoExtractor): 'title': 'Say something nice', 'thumbnail': 're:http://.*\.jpg', }, + }, { + 'url': 'http://daclips.in/3rso4kdn6f9m', + 'info_dict': { + 'id': '3rso4kdn6f9m', + 'ext': 'mp4', + 'title': 'Micro Pig piglets ready on 16th July 2009', + 'thumbnail': 're:http://.*\.jpg', + }, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - url = 'http://gorillavid.in/%s' % video_id - webpage = self._download_webpage(url, video_id) fields = dict(re.findall(r'''(?x) Date: Fri, 11 Jul 2014 11:08:36 +0200 Subject: [PATCH 58/89] [vimple] Do not fail if duration is missing --- youtube_dl/extractor/vimple.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index f3a807cd3..86344849a 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -1,11 +1,13 @@ # coding: utf-8 from __future__ import unicode_literals + import re import zlib import base64 import xml.etree.ElementTree from .common import InfoExtractor +from ..utils import int_or_none class VimpleIE(InfoExtractor): @@ -79,6 +81,6 @@ class VimpleIE(InfoExtractor): 'title': video.find('Title').text, 'formats': formats, 'thumbnail': video.find('Poster').get('url'), - 'duration': int(video.get('duration')), + 'duration': int_or_none(video.get('duration')), 'webpage_url': video.find('Share').get('videoPageUrl'), } From e93f4f7578955b2484fe1e8927a3b5dafd9d5b52 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 11:09:01 +0200 Subject: [PATCH 59/89] [vodlocker] Remove unused imports --- youtube_dl/extractor/vodlocker.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index dfc570930..68c59364b 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -2,10 +2,8 @@ from __future__ import unicode_literals import re -import time from .common import InfoExtractor from ..utils import ( - determine_ext, compat_urllib_parse, compat_urllib_request, ) From 1eb867f33fe8147cc959e7d2fcc0701a0489dc29 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 11:11:09 +0200 Subject: [PATCH 60/89] [vimple] Simplify and PEP8 --- youtube_dl/extractor/vimple.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index 86344849a..33d370e1c 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -1,10 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -import re -import zlib import base64 +import re import xml.etree.ElementTree +import zlib from .common import InfoExtractor from ..utils import int_or_none @@ -38,21 +38,21 @@ class VimpleIE(InfoExtractor): }, ] - # http://jsunpack-n.googlecode.com/svn-history/r63/trunk/swf.py - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') iframe_url = 'http://player.vimple.ru/iframe/%s' % video_id - iframe = self._download_webpage(iframe_url, video_id, note='Downloading iframe', errnote='unable to fetch iframe') - player_url = self._html_search_regex(r'"(http://player.vimple.ru/flash/.+?)"', iframe, 'player url') + iframe = self._download_webpage( + iframe_url, video_id, + note='Downloading iframe', errnote='unable to fetch iframe') + player_url = self._html_search_regex( + r'"(http://player.vimple.ru/flash/.+?)"', iframe, 'player url') - player = self._request_webpage(player_url, video_id, note='Downloading swf player').read() + player = self._request_webpage( + player_url, video_id, note='Downloading swf player').read() - # http://stackoverflow.com/a/6804758 - # http://stackoverflow.com/a/12073686 player = zlib.decompress(player[8:]) xml_pieces = re.findall(b'([a-zA-Z0-9 =+/]{500})', player) From 3d55f2806ef23d567722ee61f7bf9d0662f81639 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 11:11:52 +0200 Subject: [PATCH 61/89] Credit @irtusb for vimple (#3073) --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 31ed63fcc..24ccc9eb8 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -60,6 +60,7 @@ __authors__ = ( 'Georg Jähnig', 'Ralf Haring', 'Koki Takahashi', + 'Ariset Llerena', ) __license__ = 'Public Domain' From 04c77a54b0542b914a979d04bcc7b86dd375f828 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 11:15:35 +0200 Subject: [PATCH 62/89] [tenplay] PEP8 --- youtube_dl/extractor/tenplay.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/tenplay.py b/youtube_dl/extractor/tenplay.py index 449351551..8477840fc 100644 --- a/youtube_dl/extractor/tenplay.py +++ b/youtube_dl/extractor/tenplay.py @@ -5,11 +5,12 @@ import re from .common import InfoExtractor + class TenPlayIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+' _TEST = { 'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way', - 'md5': 'c9dda6aac8f814352ad2aee8899b1612', + #'md5': 'd68703d9f73dc8fccf3320ab34202590', 'info_dict': { 'id': '2695695426001', 'ext': 'flv', @@ -17,17 +18,28 @@ class TenPlayIE(InfoExtractor): 'description': 'Welcome to a new TV experience. Enjoy a taste of the TENplay benefits.', 'timestamp': 1380150606.889, 'upload_date': '20130925', - 'uploader': 'TENplay' + 'uploader': 'TENplay', + }, + 'params': { + 'skip_download': True, # Requires rtmpdump } } - _video_fields = ["id","name","shortDescription","longDescription","creationDate","publishedDate","lastModifiedDate","customFields","videoStillURL","thumbnailURL","referenceId","length","playsTotal","playsTrailingWeek","renditions","captioning","startDate","endDate"] + _video_fields = [ + "id", "name", "shortDescription", "longDescription", "creationDate", + "publishedDate", "lastModifiedDate", "customFields", "videoStillURL", + "thumbnailURL", "referenceId", "length", "playsTotal", + "playsTrailingWeek", "renditions", "captioning", "startDate", "endDate"] def _real_extract(self, url): webpage = self._download_webpage(url, url) - video_id = self._html_search_regex(r'videoID: "(\d+?)"', webpage, 'video_id') - api_token = self._html_search_regex(r'apiToken: "([a-zA-Z0-9-_\.]+?)"', webpage, 'api_token') - title = self._html_search_regex(r'', webpage, 'title') + video_id = self._html_search_regex( + r'videoID: "(\d+?)"', webpage, 'video_id') + api_token = self._html_search_regex( + r'apiToken: "([a-zA-Z0-9-_\.]+?)"', webpage, 'api_token') + title = self._html_search_regex( + r'', + webpage, 'title') json = self._download_json('https://api.brightcove.com/services/library?command=find_video_by_id&video_id=%s&token=%s&video_fields=%s' % (video_id, api_token, ','.join(self._video_fields)), title) @@ -50,8 +62,8 @@ class TenPlayIE(InfoExtractor): 'ext': ext, 'vcodec': rendition['videoCodec'].lower(), 'container': rendition['videoContainer'].lower(), - 'url': url - }) + 'url': url, + }) return { 'id': video_id, From cdc22cb8861e95a874f0271c84dbc6be487e03fc Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 11:16:04 +0200 Subject: [PATCH 63/89] Credit @adammw for tenplay (#2954) --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 24ccc9eb8..c1f8a401e 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -61,6 +61,7 @@ __authors__ = ( 'Ralf Haring', 'Koki Takahashi', 'Ariset Llerena', + 'Adam Malcontenti-Wilson', ) __license__ = 'Public Domain' From d96b9d40f04110f427e5bbd2dcc75aeb375291c7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 11:27:44 +0200 Subject: [PATCH 64/89] [gameone] Sort formats --- youtube_dl/extractor/gameone.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index 2544ea521..b580f52fb 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -76,6 +76,7 @@ class GameOneIE(InfoExtractor): } for r in rendition_items ] + self._sort_formats(formats) return { 'id': video_id, From 1df0ae217055c5af5e4ca9904d9d77a41b828f86 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 11:29:17 +0200 Subject: [PATCH 65/89] Credit @tobidope for gameone (#2941) --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index c1f8a401e..e55cba9f4 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -62,6 +62,7 @@ __authors__ = ( 'Koki Takahashi', 'Ariset Llerena', 'Adam Malcontenti-Wilson', + 'Tobias Bell', ) __license__ = 'Public Domain' From fada438acf7220fbff6450800833585d0b0a1843 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 11:53:28 +0200 Subject: [PATCH 66/89] release 2014.07.11.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d6b05892c..ac3f72d5b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.11' +__version__ = '2014.07.11.1' From 4e415288d73f3ec15a0b2854de79c7359d1ae6fe Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 13:21:32 +0200 Subject: [PATCH 67/89] [criterion] Simplify and modernize --- youtube_dl/extractor/criterion.py | 51 ++++++++++++++++--------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py index 31fe3d57b..4fb178165 100644 --- a/youtube_dl/extractor/criterion.py +++ b/youtube_dl/extractor/criterion.py @@ -1,40 +1,43 @@ # -*- coding: utf-8 -*- +from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import determine_ext + class CriterionIE(InfoExtractor): - _VALID_URL = r'https?://www\.criterion\.com/films/(\d*)-.+' + _VALID_URL = r'https?://www\.criterion\.com/films/(?P[0-9]+)-.+' _TEST = { - u'url': u'http://www.criterion.com/films/184-le-samourai', - u'file': u'184.mp4', - u'md5': u'bc51beba55685509883a9a7830919ec3', - u'info_dict': { - u"title": u"Le Samouraï", - u"description" : u'md5:a2b4b116326558149bef81f76dcbb93f', + 'url': 'http://www.criterion.com/films/184-le-samourai', + 'md5': 'bc51beba55685509883a9a7830919ec3', + 'info_dict': { + 'id': '184', + 'ext': 'mp4', + 'title': 'Le Samouraï', + 'description': 'md5:a2b4b116326558149bef81f76dcbb93f', } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) + video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - final_url = self._search_regex(r'so.addVariable\("videoURL", "(.+?)"\)\;', - webpage, 'video url') - title = self._html_search_regex(r'', - webpage, 'video title') - description = self._html_search_regex(r'', - webpage, 'video description') - thumbnail = self._search_regex(r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', - webpage, 'thumbnail url') + final_url = self._search_regex( + r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') + title = self._og_search_title(webpage) + description = self._html_search_regex( + r'', + webpage, 'video description') + thumbnail = self._search_regex( + r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', + webpage, 'thumbnail url') - return {'id': video_id, - 'url' : final_url, - 'title': title, - 'ext': determine_ext(final_url), - 'description': description, - 'thumbnail': thumbnail, - } + return { + 'id': video_id, + 'url': final_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + } From 38ad119f97cba871d34b057050547ba56b3e54c6 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 13:34:19 +0200 Subject: [PATCH 68/89] [screencast] Add new extractor (Fixes #3236) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/screencast.py | 48 ++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 youtube_dl/extractor/screencast.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fcc7d0b58..15d2f0e2a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -248,6 +248,7 @@ from .rutube import ( from .rutv import RUTVIE from .savefrom import SaveFromIE from .scivee import SciVeeIE +from .screencast import ScreencastIE from .servingsys import ServingSysIE from .sina import SinaIE from .slideshare import SlideshareIE diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py new file mode 100644 index 000000000..f2ced39c4 --- /dev/null +++ b/youtube_dl/extractor/screencast.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_parse_qs, + compat_urllib_request, +) + + +class ScreencastIE(InfoExtractor): + _VALID_URL = r'https?://www\.screencast\.com/t/(?P[a-zA-Z0-9]+)' + _TEST = { + 'url': 'http://www.screencast.com/t/3ZEjQXlT', + 'md5': '917df1c13798a3e96211dd1561fded83', + 'info_dict': { + 'id': '3ZEjQXlT', + 'ext': 'm4v', + 'title': 'Color Measurement with Ocean Optics Spectrometers', + 'description': 'md5:240369cde69d8bed61349a199c5fb153', + 'thumbnail': 're:^https?://.*\.jpg$' + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + + flash_vars_s = self._html_search_regex( + r' Date: Fri, 11 Jul 2014 13:34:48 +0200 Subject: [PATCH 69/89] release 2014.07.11.2 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ac3f72d5b..7ea6e7d43 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.11.1' +__version__ = '2014.07.11.2' From 40c696e5c6e01bd94ae0d5f17ef77c368588106c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 15:38:18 +0200 Subject: [PATCH 70/89] [screencast] Add suppot for more video types (#3236) --- youtube_dl/extractor/common.py | 4 +- youtube_dl/extractor/screencast.py | 69 +++++++++++++++++++++++++----- 2 files changed, 60 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f1ed30704..e68657314 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -463,14 +463,14 @@ class InfoExtractor(object): def _og_search_url(self, html, **kargs): return self._og_search_property('url', html, **kargs) - def _html_search_meta(self, name, html, display_name=None, fatal=False): + def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs): if display_name is None: display_name = name return self._html_search_regex( r'''(?ix)]+(?:itemprop|name|property)=["\']%s["\']) [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), - html, display_name, fatal=fatal) + html, display_name, fatal=fatal, **kwargs) def _dc_search_uploader(self, html): return self._html_search_meta('dc.creator', html, 'uploader') diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index f2ced39c4..ba69739b2 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, compat_parse_qs, compat_urllib_request, ) @@ -12,7 +13,7 @@ from ..utils import ( class ScreencastIE(InfoExtractor): _VALID_URL = r'https?://www\.screencast\.com/t/(?P[a-zA-Z0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.screencast.com/t/3ZEjQXlT', 'md5': '917df1c13798a3e96211dd1561fded83', 'info_dict': { @@ -20,24 +21,70 @@ class ScreencastIE(InfoExtractor): 'ext': 'm4v', 'title': 'Color Measurement with Ocean Optics Spectrometers', 'description': 'md5:240369cde69d8bed61349a199c5fb153', - 'thumbnail': 're:^https?://.*\.jpg$' + 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', } - } + }, { + 'url': 'http://www.screencast.com/t/V2uXehPJa1ZI', + 'md5': 'e8e4b375a7660a9e7e35c33973410d34', + 'info_dict': { + 'id': 'V2uXehPJa1ZI', + 'ext': 'mov', + 'title': 'The Amadeus Spectrometer', + 'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit', + 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://www.screencast.com/t/aAB3iowa', + 'md5': 'dedb2734ed00c9755761ccaee88527cd', + 'info_dict': { + 'id': 'aAB3iowa', + 'ext': 'mp4', + 'title': 'Google Earth Export', + 'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.', + 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + } + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - flash_vars_s = self._html_search_regex( - r'>(.*?)<', + webpage, 'title') + thumbnail = self._og_search_thumbnail(webpage) + description = self._og_search_description(webpage, default=None) + if description is None: + description = self._html_search_meta('description', webpage) return { 'id': video_id, From 133af9385b1a8ae593718561ab7b92cc52332016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 11 Jul 2014 16:16:30 +0200 Subject: [PATCH 71/89] Update supported formats for the --recode-video option (#3228) --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index e55cba9f4..89a2cb3e8 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -509,7 +509,7 @@ def parseOpts(overrideArguments=None): postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5', help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)') postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None, - help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm)') + help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)') postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, help='keeps the video file on disk after the post-processing; the video is erased by default') postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, From 00ac799b6875c14886d18328c8a6563f751127a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 11 Jul 2014 22:04:24 +0700 Subject: [PATCH 72/89] [vine:user] Update test --- test/test_playlists.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index 3a88cf270..1a38a667b 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -111,7 +111,7 @@ class TestPlaylists(unittest.TestCase): ie = VineUserIE(dl) result = ie.extract('https://vine.co/Visa') self.assertIsPlaylist(result) - self.assertTrue(len(result['entries']) >= 50) + self.assertTrue(len(result['entries']) >= 47) def test_ustream_channel(self): dl = FakeYDL() From 345e37831c6d6e215986f956f5dbf0578773ed38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 11 Jul 2014 22:08:04 +0700 Subject: [PATCH 73/89] [youtube] Update nosubtitles test --- test/test_subtitles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 5736fe581..48c302198 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -87,7 +87,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles): def test_youtube_nosubtitles(self): self.DL.expect_warning(u'video doesn\'t have subtitles') - self.url = 'sAjKT8FhjI8' + self.url = 'n5BB19UTcdA' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() From 09018e19a596f3a39bf7d871d8bb14c185b2470b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 17:21:16 +0200 Subject: [PATCH 74/89] release 2014.07.11.3 --- README.md | 2 +- youtube_dl/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index dffdaa9dc..bc5e0f76d 100644 --- a/README.md +++ b/README.md @@ -255,7 +255,7 @@ which means you can modify it, redistribute it or use it however you like. 128K (default 5) --recode-video FORMAT Encode the video to another format if necessary (currently supported: - mp4|flv|ogg|webm) + mp4|flv|ogg|webm|mkv) -k, --keep-video keeps the video file on disk after the post-processing; the video is erased by default diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7ea6e7d43..2c9591630 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.11.2' +__version__ = '2014.07.11.3' From aaefb347c0177d0b3f3fe6ade08fe4657479ee4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 11 Jul 2014 22:23:00 +0700 Subject: [PATCH 75/89] [gorillavid] Fix embedded videos extraction --- youtube_dl/extractor/gorillavid.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py index 50ef54cce..ca5f7c417 100644 --- a/youtube_dl/extractor/gorillavid.py +++ b/youtube_dl/extractor/gorillavid.py @@ -14,8 +14,8 @@ from ..utils import ( class GorillaVidIE(InfoExtractor): IE_DESC = 'GorillaVid.in and daclips.in' _VALID_URL = r'''(?x) - https?://(?:www\.)? - (?:daclips\.in|gorillavid\.in)/ + https?://(?P(?:www\.)? + (?:daclips\.in|gorillavid\.in))/ (?:embed-)?(?P[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)? ''' @@ -39,6 +39,7 @@ class GorillaVidIE(InfoExtractor): }, }, { 'url': 'http://daclips.in/3rso4kdn6f9m', + 'md5': '1ad8fd39bb976eeb66004d3a4895f106', 'info_dict': { 'id': '3rso4kdn6f9m', 'ext': 'mp4', @@ -51,7 +52,7 @@ class GorillaVidIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage('http://%s/%s' % (mobj.group('host'), video_id), video_id) fields = dict(re.findall(r'''(?x) Date: Fri, 11 Jul 2014 22:52:48 +0700 Subject: [PATCH 76/89] [screencast] Add one more format and improve title extraction --- youtube_dl/extractor/screencast.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index ba69739b2..306869e6a 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -43,6 +43,16 @@ class ScreencastIE(InfoExtractor): 'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.', 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', } + }, { + 'url': 'http://www.screencast.com/t/X3ddTrYh', + 'md5': '669ee55ff9c51988b4ebc0877cc8b159', + 'info_dict': { + 'id': 'X3ddTrYh', + 'ext': 'wmv', + 'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression', + 'description': 'md5:7b9f393bc92af02326a5c5889639eab0', + 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + } }, ] @@ -59,6 +69,12 @@ class ScreencastIE(InfoExtractor): flash_vars_s = self._html_search_regex( r'>(.*?)<', + [r'Title: ([^<]*)', + r'class="tabSeperator">>(.*?)<'], webpage, 'title') thumbnail = self._og_search_thumbnail(webpage) description = self._og_search_description(webpage, default=None) From 678f58de4bf8c07116e4ea2255770ab0ba665c14 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Sat, 12 Jul 2014 00:42:42 +0300 Subject: [PATCH 77/89] [firedrive] Add new extractor. Addresses #3095 --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/firedrive.py | 81 +++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 youtube_dl/extractor/firedrive.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 15d2f0e2a..c215811c3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -83,6 +83,7 @@ from .extremetube import ExtremeTubeIE from .facebook import FacebookIE from .faz import FazIE from .fc2 import FC2IE +from .firedrive import FiredriveIE from .firstpost import FirstpostIE from .firsttv import FirstTVIE from .fivemin import FiveMinIE diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py new file mode 100644 index 000000000..1d83048e8 --- /dev/null +++ b/youtube_dl/extractor/firedrive.py @@ -0,0 +1,81 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + compat_urllib_parse, + compat_urllib_request, + determine_ext, +) + + +class FiredriveIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?firedrive\.com/' + \ + '(?:file|embed)/(?P[0-9a-zA-Z]+)' + _FILE_DELETED_REGEX = r'
      ' + + _TESTS = [{ + 'url': 'https://www.firedrive.com/file/FEB892FA160EBD01', + 'md5': 'd5d4252f80ebeab4dc2d5ceaed1b7970', + 'info_dict': { + 'id': 'FEB892FA160EBD01', + 'ext': 'flv', + 'title': 'bbb_theora_486kbit.flv', + 'thumbnail': 're:http://.*\.jpg', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + url = 'http://firedrive.com/file/%s' % video_id + + webpage = self._download_webpage(url, video_id) + + if re.search(self._FILE_DELETED_REGEX, webpage) is not None: + raise ExtractorError(u'Video %s does not exist' % video_id, + expected=True) + + fields = dict(re.findall(r'''(?x)(.+)
      ', + webpage, 'title') + thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage, + 'thumbnail', fatal=False, default="") + url = self._search_regex(r'file:\s?\'(http[^\']+)\',', + webpage, 'file url') + ext = self._search_regex(r'type:\s?\'([^\']+)\',', + webpage, 'extension', fatal=False) + + formats = [{ + 'format_id': 'sd', + 'url': url, + 'ext': ext or determine_ext(url), + 'quality': 1, + }] + + return { + 'id': video_id, + 'title': title, + 'thumbnail': "http:" + thumbnail, + 'formats': formats, + } From 0d90e0f067842d35ec802cff4fcbd882023135fe Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 12 Jul 2014 14:23:54 +0200 Subject: [PATCH 78/89] Credit @naglis for firedrive (#3242) --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 89a2cb3e8..5e16a5491 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -63,6 +63,7 @@ __authors__ = ( 'Ariset Llerena', 'Adam Malcontenti-Wilson', 'Tobias Bell', + 'Naglis Jonaitis', ) __license__ = 'Public Domain' From c993c829e22cec2e1424ff45deedeecc9638bd5e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 12 Jul 2014 14:27:14 +0200 Subject: [PATCH 79/89] [firedrive] Simplify --- youtube_dl/extractor/firedrive.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py index 1d83048e8..d26145db1 100644 --- a/youtube_dl/extractor/firedrive.py +++ b/youtube_dl/extractor/firedrive.py @@ -24,7 +24,7 @@ class FiredriveIE(InfoExtractor): 'id': 'FEB892FA160EBD01', 'ext': 'flv', 'title': 'bbb_theora_486kbit.flv', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': 're:^http://.*\.jpg$', }, }] @@ -37,7 +37,7 @@ class FiredriveIE(InfoExtractor): webpage = self._download_webpage(url, video_id) if re.search(self._FILE_DELETED_REGEX, webpage) is not None: - raise ExtractorError(u'Video %s does not exist' % video_id, + raise ExtractorError('Video %s does not exist' % video_id, expected=True) fields = dict(re.findall(r'''(?x)(.+)', webpage, 'title') thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage, - 'thumbnail', fatal=False, default="") - url = self._search_regex(r'file:\s?\'(http[^\']+)\',', - webpage, 'file url') + 'thumbnail', fatal=False) + if thumbnail is not None: + thumbnail = 'http:' + thumbnail + ext = self._search_regex(r'type:\s?\'([^\']+)\',', webpage, 'extension', fatal=False) + video_url = self._search_regex( + r'file:\s?\'(http[^\']+)\',', webpage, 'file url') formats = [{ 'format_id': 'sd', - 'url': url, - 'ext': ext or determine_ext(url), - 'quality': 1, + 'url': video_url, + 'ext': ext, }] return { 'id': video_id, 'title': title, - 'thumbnail': "http:" + thumbnail, + 'thumbnail': thumbnail, 'formats': formats, } From 34dbcb8505897ffc91197e6db909bf38d390475e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 12 Jul 2014 22:08:33 +0700 Subject: [PATCH 80/89] [ndr] Replace 404 test --- youtube_dl/extractor/ndr.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 3d6096e46..94d5ba982 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -18,15 +18,15 @@ class NDRIE(InfoExtractor): _TESTS = [ { - 'url': 'http://www.ndr.de/fernsehen/sendungen/markt/markt7959.html', - 'md5': 'e7a6079ca39d3568f4996cb858dd6708', + 'url': 'http://www.ndr.de/fernsehen/media/dienordreportage325.html', + 'md5': '4a4eeafd17c3058b65f0c8f091355855', 'note': 'Video file', 'info_dict': { - 'id': '7959', + 'id': '325', 'ext': 'mp4', - 'title': 'Markt - die ganze Sendung', - 'description': 'md5:af9179cf07f67c5c12dc6d9997e05725', - 'duration': 2655, + 'title': 'Blaue Bohnen aus Blocken', + 'description': 'md5:190d71ba2ccddc805ed01547718963bc', + 'duration': 1715, }, }, { From 81650f95e2d28f4acc8a864c5221f1e95f75adda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Jul 2014 04:03:22 +0700 Subject: [PATCH 81/89] [ruhd] Add extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/ruhd.py | 46 ++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 youtube_dl/extractor/ruhd.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c215811c3..e89a83e32 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -240,6 +240,7 @@ from .rtbf import RTBFIE from .rtlnow import RTLnowIE from .rts import RTSIE from .rtve import RTVEALaCartaIE +from .ruhd import RUHDIE from .rutube import ( RutubeIE, RutubeChannelIE, diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py new file mode 100644 index 000000000..55b58e5e6 --- /dev/null +++ b/youtube_dl/extractor/ruhd.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class RUHDIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P\d+)' + _TEST = { + 'url': 'http://www.ruhd.ru/play.php?vid=207', + 'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83', + 'info_dict': { + 'id': '207', + 'ext': 'divx', + 'title': 'КОТ бааааам', + 'description': 'классный кот)', + 'thumbnail': 're:^http://.*\.jpg$', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_regex( + r'([^<]+)   RUHD.ru - Видео Высокого качества №1 в России!', webpage, 'title') + description = self._html_search_regex( + r'(?s)
      (.+?)', webpage, 'description', fatal=False) + thumbnail = self._html_search_regex( + r' Date: Thu, 10 Jul 2014 04:10:02 +0200 Subject: [PATCH 82/89] [ReverbNation] Add new IE - closes #2250 --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/reverbnation.py | 45 ++++++++++++++++++++++++++++ youtube_dl/utils.py | 2 +- 3 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/reverbnation.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e89a83e32..a03f9d3ad 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -232,6 +232,7 @@ from .radiofrance import RadioFranceIE from .rai import RaiIE from .rbmaradio import RBMARadioIE from .redtube import RedTubeIE +from .reverbnation import ReverbNationIE from .ringtv import RingTVIE from .ro220 import Ro220IE from .rottentomatoes import RottenTomatoesIE diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py new file mode 100644 index 000000000..49cf427a1 --- /dev/null +++ b/youtube_dl/extractor/reverbnation.py @@ -0,0 +1,45 @@ +from __future__ import unicode_literals + +import re +import time + +from .common import InfoExtractor +from ..utils import strip_jsonp + + +class ReverbNationIE(InfoExtractor): + _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P\d+).*?$' + _TESTS = [{ + 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', + 'file': '16965047.mp3', + 'md5': '3da12ebca28c67c111a7f8b262d3f7a7', + 'info_dict': { + "title": "MONA LISA", + "uploader": "ALKILADOS", + "uploader_id": 216429, + "thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg" + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + song_id = mobj.group('id') + + api_res = self._download_json( + 'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d' + % (song_id, int(time.time() * 1000)), + song_id, + transform_source=strip_jsonp, + note='Downloading information of song %s' % song_id + ) + + return { + 'id': song_id, + 'title': api_res.get('name'), + 'url': api_res.get('url'), + 'uploader': api_res.get('artist', {}).get('name'), + 'uploader_id': api_res.get('artist', {}).get('id'), + 'thumbnail': api_res.get('image', api_res.get('thumbnail')), + 'ext': 'mp3', + 'vcodec': 'none', + } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 2cba2bfc1..a2890b764 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1428,7 +1428,7 @@ US_RATINGS = { def strip_jsonp(code): - return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code) + return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\)\s*?\s*$', r'\1', code) def qualities(quality_ids): From 6a46dc8db7c5d71107cc555a0f178c7c26c109d6 Mon Sep 17 00:00:00 2001 From: Adam Malcontenti-Wilson Date: Sun, 13 Jul 2014 12:48:30 +1000 Subject: [PATCH 83/89] Add southpark.cc.com to southpark IE --- youtube_dl/extractor/southparkstudios.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southparkstudios.py index aea8e6439..e2df242c5 100644 --- a/youtube_dl/extractor/southparkstudios.py +++ b/youtube_dl/extractor/southparkstudios.py @@ -5,7 +5,7 @@ from .mtv import MTVServicesInfoExtractor class SouthParkStudiosIE(MTVServicesInfoExtractor): IE_NAME = 'southparkstudios.com' - _VALID_URL = r'https?://(www\.)?(?Psouthparkstudios\.com/(clips|full-episodes)/(?P.+?)(\?|#|$))' + _VALID_URL = r'https?://(www\.)?(?P(?:southpark\.cc|southparkstudios)\.com/(clips|full-episodes)/(?P.+?)(\?|#|$))' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' From b1298d8e064e3c1d31bdfffe8a3b5cfed8b0b61d Mon Sep 17 00:00:00 2001 From: Adam Malcontenti-Wilson Date: Sun, 13 Jul 2014 21:15:18 +1000 Subject: [PATCH 84/89] Test for colon in mgid --- youtube_dl/extractor/mtv.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index af9490ccc..228b42d2b 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -158,6 +158,9 @@ class MTVServicesInfoExtractor(InfoExtractor): if mgid.endswith('.swf'): mgid = mgid[:-4] except RegexNotFoundError: + mgid = None + + if mgid is None or ':' not in mgid: mgid = self._search_regex( [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], webpage, u'mgid') From 3804b012760dcc512322b49c7ae1dc4b8231b0db Mon Sep 17 00:00:00 2001 From: Adam Malcontenti-Wilson Date: Sun, 13 Jul 2014 21:29:04 +1000 Subject: [PATCH 85/89] Update test --- youtube_dl/extractor/southparkstudios.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southparkstudios.py index e2df242c5..6955269f7 100644 --- a/youtube_dl/extractor/southparkstudios.py +++ b/youtube_dl/extractor/southparkstudios.py @@ -14,7 +14,7 @@ class SouthParkStudiosIE(MTVServicesInfoExtractor): 'info_dict': { 'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30', 'ext': 'mp4', - 'title': 'Bat Daded', + 'title': 'South Park|Bat Daded', 'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.', }, }] From 10d00a756aa79a5f5e56ea75fd8d80aff3cb2b23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 13 Jul 2014 14:08:23 +0200 Subject: [PATCH 86/89] rename southparkstudios.py to southpark.py And make the extractor only recognize southpark.cc.com urls, the old urls are redirected. --- youtube_dl/extractor/__init__.py | 4 ++-- .../extractor/{southparkstudios.py => southpark.py} | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) rename youtube_dl/extractor/{southparkstudios.py => southpark.py} (75%) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a03f9d3ad..e49ac3e52 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -270,8 +270,8 @@ from .soundcloud import ( SoundcloudPlaylistIE ) from .soundgasm import SoundgasmIE -from .southparkstudios import ( - SouthParkStudiosIE, +from .southpark import ( + SouthParkIE, SouthparkDeIE, ) from .space import SpaceIE diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southpark.py similarity index 75% rename from youtube_dl/extractor/southparkstudios.py rename to youtube_dl/extractor/southpark.py index 6955269f7..c20397b3d 100644 --- a/youtube_dl/extractor/southparkstudios.py +++ b/youtube_dl/extractor/southpark.py @@ -3,14 +3,14 @@ from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor -class SouthParkStudiosIE(MTVServicesInfoExtractor): - IE_NAME = 'southparkstudios.com' - _VALID_URL = r'https?://(www\.)?(?P(?:southpark\.cc|southparkstudios)\.com/(clips|full-episodes)/(?P.+?)(\?|#|$))' +class SouthParkIE(MTVServicesInfoExtractor): + IE_NAME = 'southpark.cc.com' + _VALID_URL = r'https?://(www\.)?(?Psouthpark\.cc\.com/(clips|full-episodes)/(?P.+?)(\?|#|$))' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' _TESTS = [{ - 'url': 'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', + 'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured', 'info_dict': { 'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30', 'ext': 'mp4', @@ -20,7 +20,7 @@ class SouthParkStudiosIE(MTVServicesInfoExtractor): }] -class SouthparkDeIE(SouthParkStudiosIE): +class SouthparkDeIE(SouthParkIE): IE_NAME = 'southpark.de' _VALID_URL = r'https?://(www\.)?(?Psouthpark\.de/(clips|alle-episoden)/(?P.+?)(\?|#|$))' _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/' From 9dcea3998565838af1a0821929d7d149ae658971 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 13 Jul 2014 14:38:26 +0200 Subject: [PATCH 87/89] [tlc.de] If the url contains a fragment, use if in the iframe url (reported in #2748) The fragment is used in the webpage for selecting different videos. --- youtube_dl/extractor/tlc.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index ad175b83e..d848ee186 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from .brightcove import BrightcoveIE from .discovery import DiscoveryIE +from ..utils import compat_urlparse class TlcIE(DiscoveryIE): @@ -51,6 +52,10 @@ class TlcDeIE(InfoExtractor): # Otherwise we don't get the correct 'BrightcoveExperience' element, # example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/ iframe_url = iframe_url.replace('.htm?', '.php?') + url_fragment = compat_urlparse.urlparse(url).fragment + if url_fragment: + # Since the fragment is not send to the server, we always get the same iframe + iframe_url = re.sub(r'playlist=(\d+)', 'playlist=%s' % url_fragment, iframe_url) iframe = self._download_webpage(iframe_url, title) return { From 76233cda34a3795b405cd0b2ded14fc38930263f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 14 Jul 2014 00:38:10 +0700 Subject: [PATCH 88/89] [pyvideo] Fix title extraction --- youtube_dl/extractor/pyvideo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py index 0bc0859b4..6d5732d45 100644 --- a/youtube_dl/extractor/pyvideo.py +++ b/youtube_dl/extractor/pyvideo.py @@ -46,7 +46,7 @@ class PyvideoIE(InfoExtractor): return self.url_result(m_youtube.group(1), 'Youtube') title = self._html_search_regex( - r'
      .*?([^>]+?)

      ', + r'
      \s*]*)?>([^>]+?)

      ', webpage, 'title', flags=re.DOTALL) video_url = self._search_regex( [r'Download.*? Date: Mon, 14 Jul 2014 00:41:23 +0200 Subject: [PATCH 89/89] Fix utils.strip_jsonp --- youtube_dl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a2890b764..64a9618ca 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1428,7 +1428,7 @@ US_RATINGS = { def strip_jsonp(code): - return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\)\s*?\s*$', r'\1', code) + return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code) def qualities(quality_ids):