From 3ee4b60d564718cae7a288c4dc53bea9bb9589bb Mon Sep 17 00:00:00 2001 From: Ralf Haring Date: Fri, 16 May 2014 18:15:02 -0400 Subject: [PATCH 001/340] [vh1] Add new extractor (#2072) --- youtube_dl/extractor/__init__.py | 6 ++ youtube_dl/extractor/mtv.py | 2 + youtube_dl/extractor/vh1.py | 121 +++++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+) create mode 100644 youtube_dl/extractor/vh1.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3503c76b7..3e3d99b3e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -294,6 +294,12 @@ from .veehd import VeeHDIE from .veoh import VeohIE from .vesti import VestiIE from .vevo import VevoIE +from .vh1 import ( + VH1EpisodeIE, + VH1ClipIE, + VH1ShortUrlIE, + VH1MusicVideoIE +) from .viddler import ViddlerIE from .videobam import VideoBamIE from .videodetective import VideoDetectiveIE diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index d75241d3f..642aae811 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -80,6 +80,8 @@ class MTVServicesInfoExtractor(InfoExtractor): }) except (KeyError, TypeError): raise ExtractorError('Invalid rendition field.') + # worst format is expected to be first and best one last + formats.sort(key=lambda x: int(x['format_id'])) return formats def _get_video_info(self, itemdoc): diff --git a/youtube_dl/extractor/vh1.py b/youtube_dl/extractor/vh1.py new file mode 100644 index 000000000..0e30d7bde --- /dev/null +++ b/youtube_dl/extractor/vh1.py @@ -0,0 +1,121 @@ +from .mtv import MTVIE +import re +from ..utils import fix_xml_ampersands + +class VH1IE(MTVIE): + IE_NAME = u'vh1.com' + _FEED_URL = 'http://www.vh1.com/player/embed/AS3/fullepisode/rss/' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + idoc = self._download_xml( + self._FEED_URL + '?id=' + video_id, video_id, + 'Downloading info', transform_source=fix_xml_ampersands) + return [self._get_video_info(item) for item in idoc.findall('.//item')] + + +class VH1EpisodeIE(VH1IE): + _VALID_URL = r'https?://www\.vh1\.com/video/.+?/full-episodes/.+?/(?P[^/]+)/playlist\.jhtml' + _TESTS = [ + { + u'url': u'http://www.vh1.com/video/metal-evolution/full-episodes/progressive-metal/1678612/playlist.jhtml', + u'playlist': [ + { + u'info_dict': { + u'id': u'731565', + u'ext': u'mp4', + u'title': u'Metal Evolution: Ep. 11 Act 1', + u'description': u'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 12 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.' + } + }, + { + u'info_dict': { + u'id': u'731567', + u'ext': u'mp4', + u'title': u'Metal Evolution: Ep. 11 Act 2', + u'description': u'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.' + } + }, + + { + u'info_dict': { + u'id': u'731568', + u'ext': u'mp4', + u'title': u'Metal Evolution: Ep. 11 Act 3', + u'description': u'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.' + } + }, + { + u'info_dict': { + u'id': u'731569', + u'ext': u'mp4', + u'title': u'Metal Evolution: Ep. 11 Act 4', + u'description': u'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.' + } + }, + { + u'info_dict': { + u'id': u'731570', + u'ext': u'mp4', + u'title': u'Metal Evolution: Ep. 11 Act 5', + u'description': u'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.' + } + } + ] + } + ] + + +class VH1ClipIE(VH1IE): + _VALID_URL = r'https?://www\.vh1\.com/video/misc/.+?/.+?\.jhtml#id=(?P[^/]+)$' + _TESTS = [ + { + u'url': u'http://www.vh1.com/video/misc/706675/metal-evolution-episode-1-pre-metal-show-clip.jhtml#id=1674118', + u'info_dict': { + u'id': u'706675', + u'ext': u'mp4', + u'title': u'Metal Evolution: Episode 1 Pre-Metal Show Clip', + u'description': u'The greatest documentary ever made about Heavy Metal begins as our host Sam Dunn travels the globe to seek out the origins and influences that helped create Heavy Metal. Sam speaks to legends like Kirk Hammett, Alice Cooper, Slash, Bill Ward, Geezer Butler, Tom Morello, Ace Frehley, Lemmy Kilmister, Dave Davies, and many many more. This episode is the prologue for the 11 hour series, and Sam goes back to the very beginning to reveal how Heavy Metal was created.' + } + } + ] + + +class VH1ShortUrlIE(VH1IE): + _VALID_URL = r'https?://www\.vh1\.com/video/play.jhtml\?id=(?P[^/]+)$' + _TESTS = [ + { + u'url': u'http://www.vh1.com/video/play.jhtml?id=1678353', + u'info_dict': { + u'id': u'730355', + u'ext': u'mp4', + u'title': u'Metal Evolution: Episode 11 Progressive Metal Sneak', + u'description': u'In Metal Evolution\'s finale sneak, Sam sits with Michael Giles of King Crimson and gets feedback from Metallica guitarist Kirk Hammett on why the group was influential.' + } + } + ] + + +class VH1MusicVideoIE(VH1IE): + _VALID_URL = r'https?://www\.vh1\.com/video/.+?/(?P[^/]+)/.+?$' + _TESTS = [ + { + u'url': u'http://www.vh1.com/video/macklemore-ryan-lewis/900535/cant-hold-us-ft-ray-dalton.jhtml', + u'info_dict': { + u'id': u'900535', + u'ext': u'mp4', + u'title': u'Macklemore & Ryan Lewis - "Can\'t Hold Us ft. Ray Dalton"', + u'description': u'The Heist' + } + } + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('videoid') + # difference from VH1IE._real_extract() is "vid" param instead of "id" + idoc = self._download_xml( + self._FEED_URL + '?vid=' + video_id, video_id, + 'Downloading info', transform_source=fix_xml_ampersands) + return [self._get_video_info(item) for item in idoc.findall('.//item')] From 34d863f3fc104f58c9fd35298d33714c2c57a9f1 Mon Sep 17 00:00:00 2001 From: Ralf Haring Date: Fri, 16 May 2014 23:49:41 -0400 Subject: [PATCH 002/340] [vh1] use standard sort (#2072) --- youtube_dl/extractor/mtv.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 642aae811..e5ca41b40 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -80,8 +80,7 @@ class MTVServicesInfoExtractor(InfoExtractor): }) except (KeyError, TypeError): raise ExtractorError('Invalid rendition field.') - # worst format is expected to be first and best one last - formats.sort(key=lambda x: int(x['format_id'])) + self._sort_formats(formats) return formats def _get_video_info(self, itemdoc): From 412f356e04b0daaa1a862f8fdc155ae63376e7d2 Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Sat, 17 May 2014 14:47:23 +0200 Subject: [PATCH 003/340] [gameone] Add new extractor gameone Currently only usable for downloading tv episodes residing under http://www.gameone.de/tv/ --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/gameone.py | 62 ++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 youtube_dl/extractor/gameone.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3503c76b7..a294f66ae 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -103,6 +103,7 @@ from .freesound import FreesoundIE from .freespeech import FreespeechIE from .funnyordie import FunnyOrDieIE from .gamekings import GamekingsIE +from .gameone import GameOneIE from .gamespot import GameSpotIE from .gametrailers import GametrailersIE from .gdcvault import GDCVaultIE diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py new file mode 100644 index 000000000..a8a290477 --- /dev/null +++ b/youtube_dl/extractor/gameone.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import xpath_with_ns + +NAMESPACE_MAP = { + 'media': 'http://search.yahoo.com/mrss/', +} + +RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' + +class GameOneIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P\d+)' + _TESTS = { + 'url': 'http://www.gameone.de/tv/288', + 'md5': '136656b7fb4c9cb4a8e2d500651c499b', + 'info_dict': { + 'id': '288', + 'ext': 'mp4', + 'title': 'Game One - Folge 288', + 'duration': 1238, + 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + og_video = self._og_search_video_url(webpage, secure=False) + mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss') + + mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') + title = mrss.find('.//item/title').text + thumbnail = mrss.find('.//item/image').get('url') + content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) + content_url = content.get('url') + + content = self._download_xml(content_url, video_id, 'Downloading media:content') + rendition_items = content.findall('.//rendition') + duration = int(rendition_items[0].get('duration')) + formats = [ + { + 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text), + 'width': int(r.get('width')), + 'height': int(r.get('height')), + 'tbr': int(r.get('bitrate')), + } + for r in rendition_items + ] + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + } From 10d5c7aa5fcc4a05b039cc147b3e36732a56b0d2 Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Sat, 17 May 2014 15:10:19 +0200 Subject: [PATCH 004/340] [gameone] Added explanation for usage of http://cdn.riptide-mtvn.com/ --- youtube_dl/extractor/gameone.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index a8a290477..d5fb19cec 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -10,6 +10,8 @@ NAMESPACE_MAP = { 'media': 'http://search.yahoo.com/mrss/', } +# URL prefix to download the mp4 files directly instead of streaming via rtmp +# Credits go to XBox-Maniac http://board.jdownloader.org/showpost.php?p=185835&postcount=31 RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' class GameOneIE(InfoExtractor): From 9e30092361c3b94d66bf2aaf99087d0df201718c Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Sat, 17 May 2014 17:07:40 +0200 Subject: [PATCH 005/340] [gameone] Added extraction of description and fixed failing tests --- youtube_dl/extractor/gameone.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index d5fb19cec..855df74fb 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import re +import xml.etree.ElementTree as ET from .common import InfoExtractor from ..utils import xpath_with_ns @@ -16,7 +17,7 @@ RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' class GameOneIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P\d+)' - _TESTS = { + _TEST = { 'url': 'http://www.gameone.de/tv/288', 'md5': '136656b7fb4c9cb4a8e2d500651c499b', 'info_dict': { @@ -25,6 +26,11 @@ class GameOneIE(InfoExtractor): 'title': 'Game One - Folge 288', 'duration': 1238, 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', + 'description': 'Puh, das ist ja wieder eine volle Packung! Erst begleiten wir Nils zum ' + 'FIFA-Pressepokal 2014, den er nach 2010 nun zum zweiten Mal gewinnen will.\n' + 'Danach gibt’s eine Vorschau auf die drei kommenden Hits “Star Citizen”, “Kingdom Come: Deliverance” und “Project Cars”.\n' + 'Und dann geht’s auch schon weiter mit der nächsten Folge vom Nerdquiz! Der schöne Trant foltert seine Kandidaten wieder ' + 'mit fiesen Fragen. Hier gibt’s die erste Hälfte, in Folge 289 geht’s weiter.' } } @@ -39,6 +45,7 @@ class GameOneIE(InfoExtractor): mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') title = mrss.find('.//item/title').text thumbnail = mrss.find('.//item/image').get('url') + description = self._extract_description(mrss) content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) content_url = content.get('url') @@ -61,4 +68,9 @@ class GameOneIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, + 'description': description, } + + def _extract_description(self, mrss): + description = mrss.find('.//item/description') + return u''.join(t for t in description.itertext()) From a84d20fc14eb70310af85da385c879c365fd7897 Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Sat, 17 May 2014 18:20:29 +0200 Subject: [PATCH 006/340] [gameone] Simplified extraction of description --- youtube_dl/extractor/gameone.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index 855df74fb..aa0234346 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -26,11 +26,7 @@ class GameOneIE(InfoExtractor): 'title': 'Game One - Folge 288', 'duration': 1238, 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', - 'description': 'Puh, das ist ja wieder eine volle Packung! Erst begleiten wir Nils zum ' - 'FIFA-Pressepokal 2014, den er nach 2010 nun zum zweiten Mal gewinnen will.\n' - 'Danach gibt’s eine Vorschau auf die drei kommenden Hits “Star Citizen”, “Kingdom Come: Deliverance” und “Project Cars”.\n' - 'Und dann geht’s auch schon weiter mit der nächsten Folge vom Nerdquiz! Der schöne Trant foltert seine Kandidaten wieder ' - 'mit fiesen Fragen. Hier gibt’s die erste Hälfte, in Folge 289 geht’s weiter.' + 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1', } } @@ -40,12 +36,12 @@ class GameOneIE(InfoExtractor): webpage = self._download_webpage(url, video_id) og_video = self._og_search_video_url(webpage, secure=False) + description = self._html_search_meta('description', webpage) mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss') mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') title = mrss.find('.//item/title').text thumbnail = mrss.find('.//item/image').get('url') - description = self._extract_description(mrss) content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) content_url = content.get('url') @@ -70,7 +66,3 @@ class GameOneIE(InfoExtractor): 'formats': formats, 'description': description, } - - def _extract_description(self, mrss): - description = mrss.find('.//item/description') - return u''.join(t for t in description.itertext()) From a231ce87b56d85354f66d4a9b26763bc73ca86c1 Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Sat, 17 May 2014 18:35:11 +0200 Subject: [PATCH 007/340] [gameone] Added extraction of age_limit --- youtube_dl/extractor/gameone.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index aa0234346..3b3870878 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -27,6 +27,7 @@ class GameOneIE(InfoExtractor): 'duration': 1238, 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1', + 'age_limit': 16 } } @@ -37,6 +38,7 @@ class GameOneIE(InfoExtractor): webpage = self._download_webpage(url, video_id) og_video = self._og_search_video_url(webpage, secure=False) description = self._html_search_meta('description', webpage) + age_limit = int(self._search_regex(r'age=(\d+)', self._html_search_meta('age-de-meta-label', webpage), 'age_limit', '0')) mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss') mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') @@ -65,4 +67,5 @@ class GameOneIE(InfoExtractor): 'duration': duration, 'formats': formats, 'description': description, + 'age_limit': age_limit, } From 305d0683628d26c8e9ba04c77c4b3c7283106f80 Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Sat, 17 May 2014 19:04:02 +0200 Subject: [PATCH 008/340] [gameone] Added timestamp extraction --- youtube_dl/extractor/gameone.py | 14 +++++++++++--- youtube_dl/utils.py | 6 +++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index 3b3870878..008eb90a5 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -2,10 +2,12 @@ from __future__ import unicode_literals import re -import xml.etree.ElementTree as ET from .common import InfoExtractor -from ..utils import xpath_with_ns +from ..utils import ( + xpath_with_ns, + parse_iso8601 +) NAMESPACE_MAP = { 'media': 'http://search.yahoo.com/mrss/', @@ -15,6 +17,8 @@ NAMESPACE_MAP = { # Credits go to XBox-Maniac http://board.jdownloader.org/showpost.php?p=185835&postcount=31 RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' +PUB_DATE_FORMAT = '%Y-%m-%d %H:%M:%S %z' + class GameOneIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P\d+)' _TEST = { @@ -27,7 +31,9 @@ class GameOneIE(InfoExtractor): 'duration': 1238, 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1', - 'age_limit': 16 + 'age_limit': 16, + 'upload_date': '20140513', + 'timestamp': 1399980122, } } @@ -44,6 +50,7 @@ class GameOneIE(InfoExtractor): mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') title = mrss.find('.//item/title').text thumbnail = mrss.find('.//item/image').get('url') + timestamp = parse_iso8601(mrss.find('.//pubDate').text, delimiter=' ') content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) content_url = content.get('url') @@ -68,4 +75,5 @@ class GameOneIE(InfoExtractor): 'formats': formats, 'description': description, 'age_limit': age_limit, + 'timestamp': timestamp, } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1036ea9bd..3e7947f5d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -765,7 +765,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): https_response = http_response -def parse_iso8601(date_str): +def parse_iso8601(date_str, delimiter='T'): """ Return a UNIX timestamp from the given date """ if date_str is None: @@ -785,8 +785,8 @@ def parse_iso8601(date_str): timezone = datetime.timedelta( hours=sign * int(m.group('hours')), minutes=sign * int(m.group('minutes'))) - - dt = datetime.datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S') - timezone + date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter) + dt = datetime.datetime.strptime(date_str, date_format) - timezone return calendar.timegm(dt.timetuple()) From 48fbb1003d901ef30654db8910b6f617efc49fb4 Mon Sep 17 00:00:00 2001 From: Adam Malcontenti-Wilson Date: Mon, 19 May 2014 21:25:58 +1000 Subject: [PATCH 009/340] [adultswim] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/adultswim.py | 100 ++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 youtube_dl/extractor/adultswim.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3503c76b7..5f9ae7a3e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -1,5 +1,6 @@ from .academicearth import AcademicEarthCourseIE from .addanime import AddAnimeIE +from .adultswim import AdultSwimIE from .aftonbladet import AftonbladetIE from .anitube import AnitubeIE from .aol import AolIE diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py new file mode 100644 index 000000000..e61916fde --- /dev/null +++ b/youtube_dl/extractor/adultswim.py @@ -0,0 +1,100 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +class AdultSwimIE(InfoExtractor): + _VALID_URL = r'https?://video\.adultswim\.com/(?P.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$' + _TEST = { + 'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title', + 'md5': '4a90c63a07537ec9383175b330dfeab4', + 'info_dict': { + 'id': '8a250ba1450996e901453d7e9caf02f3', + 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind', + 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', + } + } + + _available_formats = ['150', '640', '3500'] + + _video_extensions = { + '3500': 'flv', + '640': 'mp4', + '150': 'mp4', + 'ipad': 'm3u8', + 'iphone': 'm3u8' + } + _video_dimensions = { + '3500': (1280, 720), + '640': (480, 270), + '150': (320, 180) + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_path = mobj.group('path') + + webpage = self._download_webpage(url, video_path) + episode_id = self._html_search_regex(r'', webpage, 'episode_id') + title = self._html_search_regex(r'', webpage, 'title') + + index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id + idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index') + + episode_el = idoc.find('.//episode') + show_title = episode_el.attrib.get('collectionTitle') + episode_title = episode_el.attrib.get('title') + thumbnail = episode_el.attrib.get('thumbnailUrl') + description = episode_el.find('./description').text.strip() + + entries = [] + segment_els = episode_el.findall('./segments/segment') + + for part_num, segment_el in enumerate(segment_els): + segment_id = segment_el.attrib.get('id') + segment_title = '%s %s part %d' % (show_title, episode_title, part_num + 1) + thumbnail = segment_el.attrib.get('thumbnailUrl') + duration = segment_el.attrib.get('duration') + + segment_url = 'http://asfix.adultswim.com/asfix-svc/episodeservices/getCvpPlaylist?networkName=AS&id=%s' % segment_id + idoc = self._download_xml(segment_url, segment_title, 'Downloading segment information', 'Unable to download segment information') + + formats = [] + file_els = idoc.findall('.//files/file') + + for file_el in file_els: + bitrate = file_el.attrib.get('bitrate') + type = file_el.attrib.get('type') + width, height = self._video_dimensions.get(bitrate, (None, None)) + formats.append({ + 'format_id': '%s-%s' % (bitrate, type), + 'url': file_el.text, + 'ext': self._video_extensions.get(bitrate, 'mp4'), + 'tbr': bitrate, + 'height': height, + 'width': width + }) + + self._sort_formats(formats) + + entries.append({ + 'id': segment_id, + 'title': segment_title, + 'formats': formats, + 'uploader': show_title, + 'thumbnail': thumbnail, + 'duration': duration, + 'description': description + }) + + return { + '_type': 'playlist', + 'id': episode_id, + 'display_id': video_path, + 'entries': entries, + 'title': '%s %s' % (show_title, episode_title), + 'description': description, + 'thumbnail': thumbnail + } From d415299a80c44fe97a0ef914311449ec0581a0cb Mon Sep 17 00:00:00 2001 From: Adam Malcontenti-Wilson Date: Mon, 19 May 2014 22:32:45 +1000 Subject: [PATCH 010/340] [adultswim] Fix tests --- youtube_dl/extractor/adultswim.py | 52 +++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index e61916fde..ca1bfbdc2 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -9,12 +9,52 @@ class AdultSwimIE(InfoExtractor): _VALID_URL = r'https?://video\.adultswim\.com/(?P.+?)(?:\.html)?(?:\?.*)?(?:#.*)?$' _TEST = { 'url': 'http://video.adultswim.com/rick-and-morty/close-rick-counters-of-the-rick-kind.html?x=y#title', - 'md5': '4a90c63a07537ec9383175b330dfeab4', - 'info_dict': { - 'id': '8a250ba1450996e901453d7e9caf02f3', - 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind', - 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', - } + 'playlist': [ + { + 'md5': '4da359ec73b58df4575cd01a610ba5dc', + 'info_dict': { + 'id': '8a250ba1450996e901453d7f02ca02f5', + 'ext': 'flv', + 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 1', + 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', + 'uploader': 'Rick and Morty', + 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' + } + }, + { + 'md5': 'ffbdf55af9331c509d95350bd0cc1819', + 'info_dict': { + 'id': '8a250ba1450996e901453d7f4bd102f6', + 'ext': 'flv', + 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 2', + 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', + 'uploader': 'Rick and Morty', + 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' + } + }, + { + 'md5': 'b92409635540304280b4b6c36bd14a0a', + 'info_dict': { + 'id': '8a250ba1450996e901453d7fa73c02f7', + 'ext': 'flv', + 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 3', + 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', + 'uploader': 'Rick and Morty', + 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' + } + }, + { + 'md5': 'e8818891d60e47b29cd89d7b0278156d', + 'info_dict': { + 'id': '8a250ba1450996e901453d7fc8ba02f8', + 'ext': 'flv', + 'title': 'Rick and Morty Close Rick-Counters of the Rick Kind part 4', + 'description': 'Rick has a run in with some old associates, resulting in a fallout with Morty. You got any chips, broh?', + 'uploader': 'Rick and Morty', + 'thumbnail': 'http://i.cdn.turner.com/asfix/repository/8a250ba13f865824013fc9db8b6b0400/thumbnail_267549017116827057.jpg' + } + } + ] } _available_formats = ['150', '640', '3500'] From 1d0668ed5a39b089b30b8e1e273c6b8a4f954eb2 Mon Sep 17 00:00:00 2001 From: Adam Malcontenti-Wilson Date: Mon, 19 May 2014 23:28:21 +1000 Subject: [PATCH 011/340] [tenplay] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/tenplay.py | 72 ++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 youtube_dl/extractor/tenplay.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3503c76b7..a2c12fc8e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -265,6 +265,7 @@ from .sztvhu import SztvHuIE from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE from .ted import TEDIE +from .tenplay import TenPlayIE from .testurl import TestURLIE from .tf1 import TF1IE from .theplatform import ThePlatformIE diff --git a/youtube_dl/extractor/tenplay.py b/youtube_dl/extractor/tenplay.py new file mode 100644 index 000000000..449351551 --- /dev/null +++ b/youtube_dl/extractor/tenplay.py @@ -0,0 +1,72 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +class TenPlayIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+' + _TEST = { + 'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way', + 'md5': 'c9dda6aac8f814352ad2aee8899b1612', + 'info_dict': { + 'id': '2695695426001', + 'ext': 'flv', + 'title': 'TENplay: TV your way', + 'description': 'Welcome to a new TV experience. Enjoy a taste of the TENplay benefits.', + 'timestamp': 1380150606.889, + 'upload_date': '20130925', + 'uploader': 'TENplay' + } + } + + _video_fields = ["id","name","shortDescription","longDescription","creationDate","publishedDate","lastModifiedDate","customFields","videoStillURL","thumbnailURL","referenceId","length","playsTotal","playsTrailingWeek","renditions","captioning","startDate","endDate"] + + def _real_extract(self, url): + webpage = self._download_webpage(url, url) + video_id = self._html_search_regex(r'videoID: "(\d+?)"', webpage, 'video_id') + api_token = self._html_search_regex(r'apiToken: "([a-zA-Z0-9-_\.]+?)"', webpage, 'api_token') + title = self._html_search_regex(r'', webpage, 'title') + + json = self._download_json('https://api.brightcove.com/services/library?command=find_video_by_id&video_id=%s&token=%s&video_fields=%s' % (video_id, api_token, ','.join(self._video_fields)), title) + + formats = [] + for rendition in json['renditions']: + url = rendition['remoteUrl'] or rendition['url'] + protocol = 'rtmp' if url.startswith('rtmp') else 'http' + ext = 'flv' if protocol == 'rtmp' else rendition['videoContainer'].lower() + + if protocol == 'rtmp': + url = url.replace('&mp4:', '') + + formats.append({ + 'format_id': '_'.join(['rtmp', rendition['videoContainer'].lower(), rendition['videoCodec'].lower()]), + 'width': rendition['frameWidth'], + 'height': rendition['frameHeight'], + 'tbr': rendition['encodingRate'] / 1024, + 'filesize': rendition['size'], + 'protocol': protocol, + 'ext': ext, + 'vcodec': rendition['videoCodec'].lower(), + 'container': rendition['videoContainer'].lower(), + 'url': url + }) + + return { + 'id': video_id, + 'display_id': json['referenceId'], + 'title': json['name'], + 'description': json['shortDescription'] or json['longDescription'], + 'formats': formats, + 'thumbnails': [{ + 'url': json['videoStillURL'] + }, { + 'url': json['thumbnailURL'] + }], + 'thumbnail': json['videoStillURL'], + 'duration': json['length'] / 1000, + 'timestamp': float(json['creationDate']) / 1000, + 'uploader': json['customFields']['production_company_distributor'] if 'production_company_distributor' in json['customFields'] else 'TENplay', + 'view_count': json['playsTotal'] + } From e5c3a4b54995422dcef1d2fbb032446e35358e8d Mon Sep 17 00:00:00 2001 From: Tobias Bell Date: Mon, 19 May 2014 22:33:51 +0200 Subject: [PATCH 012/340] [gameone] Fix indentation and removed unused constants --- youtube_dl/extractor/gameone.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index 008eb90a5..2544ea521 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -14,10 +14,10 @@ NAMESPACE_MAP = { } # URL prefix to download the mp4 files directly instead of streaming via rtmp -# Credits go to XBox-Maniac http://board.jdownloader.org/showpost.php?p=185835&postcount=31 +# Credits go to XBox-Maniac +# http://board.jdownloader.org/showpost.php?p=185835&postcount=31 RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' -PUB_DATE_FORMAT = '%Y-%m-%d %H:%M:%S %z' class GameOneIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P\d+)' @@ -44,7 +44,14 @@ class GameOneIE(InfoExtractor): webpage = self._download_webpage(url, video_id) og_video = self._og_search_video_url(webpage, secure=False) description = self._html_search_meta('description', webpage) - age_limit = int(self._search_regex(r'age=(\d+)', self._html_search_meta('age-de-meta-label', webpage), 'age_limit', '0')) + age_limit = int( + self._search_regex( + r'age=(\d+)', + self._html_search_meta( + 'age-de-meta-label', + webpage), + 'age_limit', + '0')) mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss') mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') @@ -54,16 +61,19 @@ class GameOneIE(InfoExtractor): content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) content_url = content.get('url') - content = self._download_xml(content_url, video_id, 'Downloading media:content') + content = self._download_xml( + content_url, + video_id, + 'Downloading media:content') rendition_items = content.findall('.//rendition') duration = int(rendition_items[0].get('duration')) formats = [ - { - 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text), - 'width': int(r.get('width')), - 'height': int(r.get('height')), - 'tbr': int(r.get('bitrate')), - } + { + 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text), + 'width': int(r.get('width')), + 'height': int(r.get('height')), + 'tbr': int(r.get('bitrate')), + } for r in rendition_items ] From 8ae980807abba5d49cd527193c1168701e66f421 Mon Sep 17 00:00:00 2001 From: "Simon W. Jackson" Date: Wed, 21 May 2014 16:35:49 +0200 Subject: [PATCH 013/340] Update test_age_restriction.py typo --- test/test_age_restriction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py index c9cdb96cb..71e80b037 100644 --- a/test/test_age_restriction.py +++ b/test/test_age_restriction.py @@ -13,7 +13,7 @@ from youtube_dl import YoutubeDL def _download_restricted(url, filename, age): - """ Returns true iff the file has been downloaded """ + """ Returns true if the file has been downloaded """ params = { 'age_limit': age, From b702ecebf037390f5dee7a72228bc23aa67212e7 Mon Sep 17 00:00:00 2001 From: anovicecodemonkey Date: Wed, 28 May 2014 22:17:13 +0930 Subject: [PATCH 014/340] [UstreamIE] added support for "/embed/recorded/" style URLs (Fixes #2990) --- youtube_dl/extractor/ustream.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index e4bb3b949..eb2944573 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -11,7 +11,7 @@ from ..utils import ( class UstreamIE(InfoExtractor): - _VALID_URL = r'https?://www\.ustream\.tv/(?Precorded|embed)/(?P\d+)' + _VALID_URL = r'https?://www\.ustream\.tv/(?Precorded|embed|embed/recorded)/(?P\d+)' IE_NAME = 'ustream' _TEST = { 'url': 'http://www.ustream.tv/recorded/20274954', @@ -25,6 +25,13 @@ class UstreamIE(InfoExtractor): def _real_extract(self, url): m = re.match(self._VALID_URL, url) + video_id = m.group('videoID') + + if m.group('type') == 'embed/recorded': # some sites use this embed format (see: http://github.com/rg3/youtube-dl/issues/2990) + video_id = m.group('videoID') + webpage = self._download_webpage(url, video_id, note="Downloading embedded Ustream page") + desktop_url = 'http://www.ustream.tv/recorded/' + video_id + return self.url_result(desktop_url, 'Ustream') if m.group('type') == 'embed': video_id = m.group('videoID') webpage = self._download_webpage(url, video_id) @@ -32,8 +39,6 @@ class UstreamIE(InfoExtractor): desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id return self.url_result(desktop_url, 'Ustream') - video_id = m.group('videoID') - video_url = 'http://tcdn.ustream.tv/video/%s' % video_id webpage = self._download_webpage(url, video_id) From 5c6b1e578cfe0b2de2d52b026b04a032688df979 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 May 2014 20:20:11 +0700 Subject: [PATCH 015/340] [ustream] Remove unnecessary webpage download --- youtube_dl/extractor/ustream.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index eb2944573..f326163a3 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -29,7 +29,6 @@ class UstreamIE(InfoExtractor): if m.group('type') == 'embed/recorded': # some sites use this embed format (see: http://github.com/rg3/youtube-dl/issues/2990) video_id = m.group('videoID') - webpage = self._download_webpage(url, video_id, note="Downloading embedded Ustream page") desktop_url = 'http://www.ustream.tv/recorded/' + video_id return self.url_result(desktop_url, 'Ustream') if m.group('type') == 'embed': From 9e8753911cf37a4f8e26ce442e8938a7a52f3dad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 May 2014 20:22:36 +0700 Subject: [PATCH 016/340] [ustream] Modernize --- youtube_dl/extractor/ustream.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index f326163a3..488b10df9 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -15,11 +15,12 @@ class UstreamIE(InfoExtractor): IE_NAME = 'ustream' _TEST = { 'url': 'http://www.ustream.tv/recorded/20274954', - 'file': '20274954.flv', 'md5': '088f151799e8f572f84eb62f17d73e5c', 'info_dict': { - "uploader": "Young Americans for Liberty", - "title": "Young Americans for Liberty February 7, 2012 2:28 AM", + 'id': '20274954', + 'ext': 'flv', + 'uploader': 'Young Americans for Liberty', + 'title': 'Young Americans for Liberty February 7, 2012 2:28 AM', }, } @@ -27,14 +28,16 @@ class UstreamIE(InfoExtractor): m = re.match(self._VALID_URL, url) video_id = m.group('videoID') - if m.group('type') == 'embed/recorded': # some sites use this embed format (see: http://github.com/rg3/youtube-dl/issues/2990) + # some sites use this embed format (see: http://github.com/rg3/youtube-dl/issues/2990) + if m.group('type') == 'embed/recorded': video_id = m.group('videoID') desktop_url = 'http://www.ustream.tv/recorded/' + video_id return self.url_result(desktop_url, 'Ustream') if m.group('type') == 'embed': video_id = m.group('videoID') webpage = self._download_webpage(url, video_id) - desktop_video_id = self._html_search_regex(r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id') + desktop_video_id = self._html_search_regex( + r'ContentVideoIds=\["([^"]*?)"\]', webpage, 'desktop_video_id') desktop_url = 'http://www.ustream.tv/recorded/' + desktop_video_id return self.url_result(desktop_url, 'Ustream') From 87fe568c28e4e59e6e72d292266f8f3a88a41814 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 30 May 2014 00:38:57 +0200 Subject: [PATCH 017/340] [nbcnews] Add support for /feature/* pages (closes #3007) --- youtube_dl/extractor/nbc.py | 79 +++++++++++++++++++++++++++---------- 1 file changed, 59 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 1a63ab56a..aa34665d1 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals import re +import json from .common import InfoExtractor from ..utils import find_xpath_attr, compat_str @@ -31,30 +32,68 @@ class NBCIE(InfoExtractor): class NBCNewsIE(InfoExtractor): - _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P\d+)' + _VALID_URL = r'''(?x)https?://www\.nbcnews\.com/ + ((video/.+?/(?P\d+))| + (feature/[^/]+/(?P.+))) + ''' - _TEST = { - 'url': 'http://www.nbcnews.com/video/nbc-news/52753292', - 'md5': '47abaac93c6eaf9ad37ee6c4463a5179', - 'info_dict': { - 'id': '52753292', - 'ext': 'flv', - 'title': 'Crew emerges after four-month Mars food study', - 'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1', + _TESTS = [ + { + 'url': 'http://www.nbcnews.com/video/nbc-news/52753292', + 'md5': '47abaac93c6eaf9ad37ee6c4463a5179', + 'info_dict': { + 'id': '52753292', + 'ext': 'flv', + 'title': 'Crew emerges after four-month Mars food study', + 'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1', + }, }, - } + { + 'url': 'http://www.nbcnews.com/feature/edward-snowden-interview/how-twitter-reacted-snowden-interview-n117236', + 'md5': 'b2421750c9f260783721d898f4c42063', + 'info_dict': { + 'id': 'I1wpAI_zmhsQ', + 'ext': 'flv', + 'title': 'How Twitter Reacted To The Snowden Interview', + 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', + }, + 'add_ie': ['ThePlatform'], + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) - info = all_info.find('video') + if video_id is not None: + all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id) + info = all_info.find('video') - return { - 'id': video_id, - 'title': info.find('headline').text, - 'ext': 'flv', - 'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, - 'description': compat_str(info.find('caption').text), - 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, - } + return { + 'id': video_id, + 'title': info.find('headline').text, + 'ext': 'flv', + 'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text, + 'description': compat_str(info.find('caption').text), + 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, + } + else: + # "feature" pages use theplatform.com + title = mobj.group('title') + webpage = self._download_webpage(url, title) + bootstrap_json = self._search_regex( + r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json', + flags=re.MULTILINE) + bootstrap = json.loads(bootstrap_json) + info = bootstrap['results'][0]['video'] + playlist_url = info['fallbackPlaylistUrl'] + '?form=MPXNBCNewsAPI' + mpxid = info['mpxId'] + all_videos = self._download_json(playlist_url, title)['videos'] + # The response contains additional videos + info = next(v for v in all_videos if v['mpxId'] == mpxid) + + return { + '_type': 'url', + # We get the best quality video + 'url': info['videoAssets'][-1]['publicUrl'], + 'ie_key': 'ThePlatform', + } From 63961d87a67dcc029ebc4dcf6e4bb23c01bb3df0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 30 May 2014 03:19:37 +0200 Subject: [PATCH 018/340] [devscripts/release] Do not commit CHANGELOG --- devscripts/release.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 2974a7c3e..453087e5f 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -45,9 +45,9 @@ fi /bin/echo -e "\n### Changing version in version.py..." sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py -/bin/echo -e "\n### Committing CHANGELOG README.md and youtube_dl/version.py..." +/bin/echo -e "\n### Committing README.md and youtube_dl/version.py..." make README.md -git add CHANGELOG README.md youtube_dl/version.py +git add README.md youtube_dl/version.py git commit -m "release $version" /bin/echo -e "\n### Now tagging, signing and pushing..." From 894e730911f051c57931282c3eacbc7b4dc230fe Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 30 May 2014 03:19:51 +0200 Subject: [PATCH 019/340] release 2014.05.30 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 638ff8af5..159d7903a 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.05.19' +__version__ = '2014.05.30' From aae74e38327fbc913e2daf6360749c53959c1c84 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 30 May 2014 03:26:00 +0200 Subject: [PATCH 020/340] [Makefile] Remove CHANGELOG entry --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a82785861..c079761ef 100644 --- a/Makefile +++ b/Makefile @@ -77,6 +77,6 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash- --exclude 'docs/_build' \ -- \ bin devscripts test youtube_dl docs \ - CHANGELOG LICENSE README.md README.txt \ + LICENSE README.md README.txt \ Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion setup.py \ youtube-dl From 77fb72646f2bf32a7044ca356d38707dfacb7234 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 30 May 2014 03:26:03 +0200 Subject: [PATCH 021/340] release 2014.05.30.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 159d7903a..d3a40325f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.05.30' +__version__ = '2014.05.30.1' From 0f97c9a06f3c4f9eca0859b93809308fadf1d1b0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 30 May 2014 04:59:18 +0200 Subject: [PATCH 022/340] [ard] Fix title (#3006) --- youtube_dl/extractor/ard.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index b88f71bc4..a87b32b22 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -38,7 +38,9 @@ class ARDIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex( - r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', webpage, 'title') + [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', + r'<h4 class="headline">(.*?)</h4>'], + webpage, 'title') description = self._html_search_meta( 'dcterms.abstract', webpage, 'description') thumbnail = self._og_search_thumbnail(webpage) From 6ebb46c106fbcd42dba7c5cdba41b78309d13fb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 30 May 2014 19:12:55 +0700 Subject: [PATCH 023/340] [ivi] Replace tests --- test/test_playlists.py | 16 ++++++++-------- youtube_dl/extractor/ivi.py | 12 ++++++------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index 63d31db8c..057ce43f0 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -209,20 +209,20 @@ class TestPlaylists(unittest.TestCase): def test_ivi_compilation(self): dl = FakeYDL() ie = IviCompilationIE(dl) - result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel') + result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa') self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'dezhurnyi_angel') - self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012)') - self.assertTrue(len(result['entries']) >= 16) + self.assertEqual(result['id'], 'dvoe_iz_lartsa') + self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008)') + self.assertTrue(len(result['entries']) >= 24) def test_ivi_compilation_season(self): dl = FakeYDL() ie = IviCompilationIE(dl) - result = ie.extract('http://www.ivi.ru/watch/dezhurnyi_angel/season1') + result = ie.extract('http://www.ivi.ru/watch/dvoe_iz_lartsa/season1') self.assertIsPlaylist(result) - self.assertEqual(result['id'], 'dezhurnyi_angel/season1') - self.assertEqual(result['title'], 'Дежурный ангел (2010 - 2012) 1 сезон') - self.assertTrue(len(result['entries']) >= 16) + self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1') + self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008) 1 сезон') + self.assertTrue(len(result['entries']) >= 12) def test_imdb_list(self): dl = FakeYDL() diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 1ba4966c7..528be1524 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -33,14 +33,14 @@ class IviIE(InfoExtractor): }, # Serial's serie { - 'url': 'http://www.ivi.ru/watch/dezhurnyi_angel/74791', - 'md5': '3e6cc9a848c1d2ebcc6476444967baa9', + 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549', + 'md5': '221f56b35e3ed815fde2df71032f4b3e', 'info_dict': { - 'id': '74791', + 'id': '9549', 'ext': 'mp4', - 'title': 'Дежурный ангел - 1 серия', - 'duration': 2490, - 'thumbnail': 'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg', + 'title': 'Двое из ларца - Серия 1', + 'duration': 2655, + 'thumbnail': 'http://thumbs.ivi.ru/f15.vcp.digitalaccess.ru/contents/8/4/0068dc0677041f3336b7c2baad8fc0.jpg', }, 'skip': 'Only works from Russia', } From 211fd6c674f9294d3a1d22b0e7f519dcec0303e5 Mon Sep 17 00:00:00 2001 From: Georg Jaehnig <georg@jaehnig.org> Date: Fri, 30 May 2014 16:35:17 +0200 Subject: [PATCH 024/340] added spiegel.tv --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/spiegeltv.py | 73 +++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 youtube_dl/extractor/spiegeltv.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index def58f1d6..b689dc3c9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -258,6 +258,7 @@ from .southparkstudios import ( from .space import SpaceIE from .spankwire import SpankwireIE from .spiegel import SpiegelIE +from .spiegeltv import SpiegeltvIE from .spike import SpikeIE from .stanfordoc import StanfordOpenClassroomIE from .steam import SteamIE diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py new file mode 100644 index 000000000..acb85ee64 --- /dev/null +++ b/youtube_dl/extractor/spiegeltv.py @@ -0,0 +1,73 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import json +import urllib +from .common import InfoExtractor + +class SpiegeltvIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/filme/(?P<id>[\-a-z0-9]+)' + _TEST = { + 'url': 'http://www.spiegel.tv/filme/flug-mh370/', + 'md5': '700d62dc485f3a81cf9d52144e5ead59', + 'info_dict': { + 'id': 'flug-mh370', + 'ext': 'm4v', + 'title': 'Flug MH370', + 'description': 'Das Rätsel um die Boeing 777 der Malaysia-Airlines', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + # TODO more code goes here, for example ... + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'<h1.*?>(.*?)</h1>', webpage, 'title') + + apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com'; + + version_json_code = urllib.urlopen('%s/version.json' % apihost).read() + version_json = json.loads(version_json_code) + version_name = version_json['version_name'] + + slug_json_code = urllib.urlopen('%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id)).read() + slug_json = json.loads(slug_json_code) + oid = slug_json['object_id'] + + media_json_code = urllib.urlopen('%s/%s/restapi/media/%s.json' % (apihost, version_name, oid)).read() + media_json = json.loads(media_json_code) + + uuid = media_json['uuid'] + is_wide = media_json['is_wide'] + + server_json_code = urllib.urlopen('http://www.spiegel.tv/streaming_servers/').read() + server_json = json.loads(server_json_code) + server = server_json[0]['endpoint'] + + thumbnails = [] + for image in media_json['images']: + thumbnails.append({'url': image['url'], 'resolution': str(image['width']) + 'x' + str(image['height']) }) + + description = media_json['subtitle'] + duration = int(round(media_json['duration_in_ms'] / 1000)) + + if is_wide: + format = '16x9' + else: + format = '4x3' + + url = server + 'mp4:' + uuid + '_spiegeltv_0500_' + format + '.m4v' + + return_dict = { + 'id': video_id, + 'title': title, + 'url': url, + 'ext': 'm4v', + 'description': description, + 'duration': duration, + 'thumbnails': thumbnails + } + return return_dict From 4ffeca4ea29fe75821c8de5fbaf8d8f585f2dbb4 Mon Sep 17 00:00:00 2001 From: Georg Jaehnig <georg@jaehnig.org> Date: Fri, 30 May 2014 16:39:24 +0200 Subject: [PATCH 025/340] cleanup --- youtube_dl/extractor/spiegeltv.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py index acb85ee64..e8f49bc52 100644 --- a/youtube_dl/extractor/spiegeltv.py +++ b/youtube_dl/extractor/spiegeltv.py @@ -23,7 +23,6 @@ class SpiegeltvIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - # TODO more code goes here, for example ... webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'<h1.*?>(.*?)</h1>', webpage, 'title') From 0cdf576d86418273e604c43cc0d9a63a685bdc7c Mon Sep 17 00:00:00 2001 From: Georg Jaehnig <georg@jaehnig.org> Date: Fri, 30 May 2014 17:51:36 +0200 Subject: [PATCH 026/340] use provided function to get JSON --- youtube_dl/extractor/spiegeltv.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py index e8f49bc52..f8265d042 100644 --- a/youtube_dl/extractor/spiegeltv.py +++ b/youtube_dl/extractor/spiegeltv.py @@ -2,8 +2,6 @@ from __future__ import unicode_literals import re -import json -import urllib from .common import InfoExtractor class SpiegeltvIE(InfoExtractor): @@ -28,22 +26,17 @@ class SpiegeltvIE(InfoExtractor): apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com'; - version_json_code = urllib.urlopen('%s/version.json' % apihost).read() - version_json = json.loads(version_json_code) + version_json = self._download_json('%s/version.json' % apihost, None) version_name = version_json['version_name'] - slug_json_code = urllib.urlopen('%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id)).read() - slug_json = json.loads(slug_json_code) + slug_json = self._download_json('%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id), None) oid = slug_json['object_id'] - media_json_code = urllib.urlopen('%s/%s/restapi/media/%s.json' % (apihost, version_name, oid)).read() - media_json = json.loads(media_json_code) - + media_json = self._download_json('%s/%s/restapi/media/%s.json' % (apihost, version_name, oid), None) uuid = media_json['uuid'] is_wide = media_json['is_wide'] - server_json_code = urllib.urlopen('http://www.spiegel.tv/streaming_servers/').read() - server_json = json.loads(server_json_code) + server_json = self._download_json('http://www.spiegel.tv/streaming_servers/', None) server = server_json[0]['endpoint'] thumbnails = [] From 14470ac87b8ea345683f2a9ccebb44d6a776d760 Mon Sep 17 00:00:00 2001 From: Georg Jaehnig <georg@jaehnig.org> Date: Fri, 30 May 2014 17:56:13 +0200 Subject: [PATCH 027/340] tabs as spaces --- youtube_dl/extractor/spiegeltv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py index f8265d042..7869859f9 100644 --- a/youtube_dl/extractor/spiegeltv.py +++ b/youtube_dl/extractor/spiegeltv.py @@ -8,7 +8,7 @@ class SpiegeltvIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/filme/(?P<id>[\-a-z0-9]+)' _TEST = { 'url': 'http://www.spiegel.tv/filme/flug-mh370/', - 'md5': '700d62dc485f3a81cf9d52144e5ead59', + 'md5': '700d62dc485f3a81cf9d52144e5ead59', 'info_dict': { 'id': 'flug-mh370', 'ext': 'm4v', From 6db80ad2db7fbe08515f3ce3a88abf91e52bd85e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 30 May 2014 20:59:15 +0200 Subject: [PATCH 028/340] [comedycentralshows] Transform the rtmp urls so that rtmpdump can download them (fixes #3010) From 'rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/*' to 'rtmpe://viacommtvstrmfs.fplive.net:1935/viacommtvstrm/gsp.comedystor/*' --- youtube_dl/extractor/comedycentral.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 6e3a316c6..ba4d73ab8 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -188,7 +188,7 @@ class ComedyCentralShowsIE(InfoExtractor): }) formats.append({ 'format_id': 'rtmp-%s' % format, - 'url': rtmp_video_url, + 'url': rtmp_video_url.replace('viacomccstrm', 'viacommtvstrm'), 'ext': self._video_extensions.get(format, 'mp4'), 'height': h, 'width': w, From ed86f38a11191f3b3dc369d70b481b031dc094e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 30 May 2014 21:10:48 +0200 Subject: [PATCH 029/340] [theplatform] Use unicode_literals and _download_json --- youtube_dl/extractor/theplatform.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index f15780ef5..b6b2dba9c 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re import json @@ -18,17 +20,17 @@ class ThePlatformIE(InfoExtractor): _TEST = { # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/ - u'url': u'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true', - u'info_dict': { - u'id': u'e9I_cZgTgIPd', - u'ext': u'flv', - u'title': u'Blackberry\'s big, bold Z30', - u'description': u'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.', - u'duration': 247, + 'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true', + 'info_dict': { + 'id': 'e9I_cZgTgIPd', + 'ext': 'flv', + 'title': 'Blackberry\'s big, bold Z30', + 'description': 'The Z30 is Blackberry\'s biggest, baddest mobile messaging device yet.', + 'duration': 247, }, - u'params': { + 'params': { # rtmp download - u'skip_download': True, + 'skip_download': True, }, } @@ -39,7 +41,7 @@ class ThePlatformIE(InfoExtractor): error_msg = next( n.attrib['abstract'] for n in meta.findall(_x('.//smil:ref')) - if n.attrib.get('title') == u'Geographic Restriction') + if n.attrib.get('title') == 'Geographic Restriction') except StopIteration: pass else: @@ -101,8 +103,7 @@ class ThePlatformIE(InfoExtractor): config_url = url+ '&form=json' config_url = config_url.replace('swf/', 'config/') config_url = config_url.replace('onsite/', 'onsite/config/') - config_json = self._download_webpage(config_url, video_id, u'Downloading config') - config = json.loads(config_json) + config = self._download_json(config_url, video_id, 'Downloading config') smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m' else: smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?' From 236d0cd07c4f9e7d9d9367da1c139b850391540f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 31 May 2014 17:45:00 +0700 Subject: [PATCH 030/340] [nrktv] Recognize tv.nrksuper.no URL --- youtube_dl/extractor/nrk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index f5117d7b3..3a6a7883e 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -72,7 +72,7 @@ class NRKIE(InfoExtractor): class NRKTVIE(InfoExtractor): - _VALID_URL = r'http://tv\.nrk\.no/(?:serie/[^/]+|program)/(?P<id>[a-z]{4}\d{8})' + _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-z]{4}\d{8})' _TESTS = [ { From 386ba39cac2f5ab614366d17c32e69d1cfb644a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 31 May 2014 14:40:05 +0200 Subject: [PATCH 031/340] [fc2] Encode the string used for the md5 checksum In python 3 it must be a bytes object. --- youtube_dl/extractor/fc2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index ca8993241..fe1cf052c 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -36,7 +36,7 @@ class FC2IE(InfoExtractor): thumbnail = self._og_search_thumbnail(webpage) refer = url.replace('/content/', '/a/content/') - mimi = hashlib.md5(video_id + '_gGddgPfeaf_gzyr').hexdigest() + mimi = hashlib.md5((video_id + '_gGddgPfeaf_gzyr').encode('utf-8')).hexdigest() info_url = ( "http://video.fc2.com/ginfo.php?mimi={1:s}&href={2:s}&v={0:s}&fversion=WIN%2011%2C6%2C602%2C180&from=2&otag=0&upid={0:s}&tk=null&". From 7e8fdb1aaeced1caf3c8c5365b32d4535d1d434e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 31 May 2014 14:45:46 +0200 Subject: [PATCH 032/340] [fc2] Recognize urls without language part (reported in #1154) --- youtube_dl/extractor/fc2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index fe1cf052c..18f91efac 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -13,7 +13,7 @@ from ..utils import ( class FC2IE(InfoExtractor): - _VALID_URL = r'^http://video\.fc2\.com/(?P<lang>[^/]+)/content/(?P<id>[^/]+)' + _VALID_URL = r'^http://video\.fc2\.com/((?P<lang>[^/]+)/)?content/(?P<id>[^/]+)' IE_NAME = 'fc2' _TEST = { 'url': 'http://video.fc2.com/en/content/20121103kUan1KHs', From 9168308579db01dbd65131d963f5721df9106e1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 31 May 2014 17:55:03 +0200 Subject: [PATCH 033/340] [vevo] The title in the url is optional (fixes #3020) --- youtube_dl/extractor/vevo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index ea34a8f16..eada13ce9 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -16,7 +16,7 @@ class VevoIE(InfoExtractor): (currently used by MTVIE) """ _VALID_URL = r'''(?x) - (?:https?://www\.vevo\.com/watch/(?:[^/]+/[^/]+/)?| + (?:https?://www\.vevo\.com/watch/(?:[^/]+/(?:[^/]+/)?)?| https?://cache\.vevo\.com/m/html/embed\.html\?video=| https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| vevo:) From 9c7b79acd95471c7fd26ada35ca5213ab6739505 Mon Sep 17 00:00:00 2001 From: MikeCol <MikeCol@gmx.net> Date: Sat, 31 May 2014 18:31:39 +0200 Subject: [PATCH 034/340] title extraction condition less restrictive --- youtube_dl/extractor/extremetube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dl/extractor/extremetube.py index ff7c0cd3e..14a196ffc 100644 --- a/youtube_dl/extractor/extremetube.py +++ b/youtube_dl/extractor/extremetube.py @@ -37,7 +37,7 @@ class ExtremeTubeIE(InfoExtractor): webpage = self._download_webpage(req, video_id) video_title = self._html_search_regex( - r'<h1 [^>]*?title="([^"]+)"[^>]*>\1<', webpage, 'title') + r'<h1 [^>]*?title="([^"]+)"[^>]*>', webpage, 'title') uploader = self._html_search_regex( r'>Posted by:(?=<)(?:\s|<[^>]*>)*(.+?)\|', webpage, 'uploader', fatal=False) From 1f6b8f311543d8033d28c71f0a744fa2dd93f975 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 31 May 2014 20:28:03 +0200 Subject: [PATCH 035/340] release 2014.05.31 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d3a40325f..07092db96 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.05.30.1' +__version__ = '2014.05.31' From dc31942f4204a22f5cc3fc074c73e7fe0bb7bd19 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 31 May 2014 20:29:53 +0200 Subject: [PATCH 036/340] release 2014.05.31.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 07092db96..ee6d31236 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.05.31' +__version__ = '2014.05.31.1' From 9b7c4fd9813d94f7d099540213c0cee7635eaf87 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 31 May 2014 20:35:12 +0200 Subject: [PATCH 037/340] release 2014.05.31.2 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ee6d31236..a784169b9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.05.31.1' +__version__ = '2014.05.31.2' From 680301685855df3d8b5aaa51d0c101426263696f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 31 May 2014 20:40:48 +0200 Subject: [PATCH 038/340] release 2014.05.31.3 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a784169b9..872ef0d84 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.05.31.2' +__version__ = '2014.05.31.3' From 1a2f2e1e66e20837a16699c27bfc45c7ef6a3f63 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 31 May 2014 20:45:24 +0200 Subject: [PATCH 039/340] release 2014.05.31.4 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 872ef0d84..67937bbc1 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.05.31.3' +__version__ = '2014.05.31.4' From ceb7a17f34b13419a29e5915c2aa1435aa4bbe2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 1 Jun 2014 14:38:36 +0700 Subject: [PATCH 040/340] [mailru] Add support for new mail.ru URL format (Closes #3024) --- youtube_dl/extractor/mailru.py | 55 +++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py index 5016989cc..7460d81cd 100644 --- a/youtube_dl/extractor/mailru.py +++ b/youtube_dl/extractor/mailru.py @@ -9,29 +9,48 @@ from .common import InfoExtractor class MailRuIE(InfoExtractor): IE_NAME = 'mailru' IE_DESC = 'Видео@Mail.Ru' - _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/video/.*#video=/?(?P<id>[^/]+/[^/]+/[^/]+/\d+)' + _VALID_URL = r'http://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)' - _TEST = { - 'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76', - 'md5': 'dea205f03120046894db4ebb6159879a', - 'info_dict': { - 'id': '46301138', - 'ext': 'mp4', - 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро', - 'timestamp': 1393232740, - 'upload_date': '20140224', - 'uploader': 'sonypicturesrus', - 'uploader_id': 'sonypicturesrus@mail.ru', - 'duration': 184, - } - } + _TESTS = [ + { + 'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76', + 'md5': 'dea205f03120046894db4ebb6159879a', + 'info_dict': { + 'id': '46301138', + 'ext': 'mp4', + 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро', + 'timestamp': 1393232740, + 'upload_date': '20140224', + 'uploader': 'sonypicturesrus', + 'uploader_id': 'sonypicturesrus@mail.ru', + 'duration': 184, + }, + }, + { + 'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html', + 'md5': '00a91a58c3402204dcced523777b475f', + 'info_dict': { + 'id': '46843144', + 'ext': 'mp4', + 'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion', + 'timestamp': 1397217632, + 'upload_date': '20140411', + 'uploader': 'hitech', + 'uploader_id': 'hitech@corp.mail.ru', + 'duration': 245, + }, + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = mobj.group('idv1') + + if not video_id: + video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix') video_data = self._download_json( - 'http://videoapi.my.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON') + 'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON') author = video_data['author'] uploader = author['name'] @@ -40,6 +59,8 @@ class MailRuIE(InfoExtractor): movie = video_data['movie'] content_id = str(movie['contentId']) title = movie['title'] + if title.endswith('.mp4'): + title = title[:-4] thumbnail = movie['poster'] duration = movie['duration'] From b7e8b6e37ab861886f38a923f9578a21122e5d5f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 2 Jun 2014 10:47:24 +0200 Subject: [PATCH 041/340] release 2014.06.02 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 67937bbc1..3c3c4e777 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.05.31.4' +__version__ = '2014.06.02' From 263bd4ec50f9c5ac44062dfbd2f7579177de74e6 Mon Sep 17 00:00:00 2001 From: Anton Novosyolov <anton.novosyolov@gmail.com> Date: Mon, 2 Jun 2014 13:30:23 +0400 Subject: [PATCH 042/340] Recognize a third format of the upload_date in the 'watch-uploader-info' element --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 981ca62c0..8327fb146 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1140,7 +1140,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): mobj = re.search(r'(?s)id="eow-date.*?>(.*?)</span>', video_webpage) if mobj is None: mobj = re.search( - r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded) on (.*?)</strong>', + r'(?s)id="watch-uploader-info".*?>.*?(?:Published|Uploaded|Streamed live) on (.*?)</strong>', video_webpage) if mobj is not None: upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) From db23d8d2a24b648efe1f4314f1687e4484f0dc8a Mon Sep 17 00:00:00 2001 From: Georg Jaehnig <georg@jaehnig.org> Date: Tue, 3 Jun 2014 16:50:54 +0200 Subject: [PATCH 043/340] [Spiegeltv] skip rtmp download to pass Travis test build --- youtube_dl/extractor/spiegeltv.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py index 7869859f9..303c11044 100644 --- a/youtube_dl/extractor/spiegeltv.py +++ b/youtube_dl/extractor/spiegeltv.py @@ -14,6 +14,10 @@ class SpiegeltvIE(InfoExtractor): 'ext': 'm4v', 'title': 'Flug MH370', 'description': 'Das Rätsel um die Boeing 777 der Malaysia-Airlines', + }, + 'params': { + # rtmp download + 'skip_download': True, } } From df53a98f2ba7d628d66189ea2733271a1da5a6ad Mon Sep 17 00:00:00 2001 From: Georg Jaehnig <georg@jaehnig.org> Date: Tue, 3 Jun 2014 17:52:39 +0200 Subject: [PATCH 044/340] [Spiegeltv] remove the md5 field to pass Travis test build --- youtube_dl/extractor/spiegeltv.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py index 303c11044..ffd554633 100644 --- a/youtube_dl/extractor/spiegeltv.py +++ b/youtube_dl/extractor/spiegeltv.py @@ -8,7 +8,6 @@ class SpiegeltvIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/filme/(?P<id>[\-a-z0-9]+)' _TEST = { 'url': 'http://www.spiegel.tv/filme/flug-mh370/', - 'md5': '700d62dc485f3a81cf9d52144e5ead59', 'info_dict': { 'id': 'flug-mh370', 'ext': 'm4v', From 6a3fa81ffba194c9fdcb30b61f72935d63f30a53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Tue, 3 Jun 2014 21:56:49 +0200 Subject: [PATCH 045/340] [ard] Fix format extraction (fixes #3006 and #3032) --- youtube_dl/extractor/ard.py | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index a87b32b22..c6d22c029 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -39,16 +39,18 @@ class ARDIE(InfoExtractor): title = self._html_search_regex( [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', + r'<meta name="dcterms.title" content="(.*?)"/>', r'<h4 class="headline">(.*?)</h4>'], webpage, 'title') description = self._html_search_meta( 'dcterms.abstract', webpage, 'description') thumbnail = self._og_search_thumbnail(webpage) - streams = [ - mo.groupdict() - for mo in re.finditer( - r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)', webpage)] + + media_info = self._download_json( + 'http://www.ardmediathek.de/play/media/%s' % video_id, video_id) + # The second element of the _mediaArray contains the standard http urls + streams = media_info['_mediaArray'][1]['_mediaStreamArray'] if not streams: if '"fsk"' in webpage: raise ExtractorError('This video is only available after 20:00') @@ -56,21 +58,12 @@ class ARDIE(InfoExtractor): formats = [] for s in streams: format = { - 'quality': int(s['quality']), + 'quality': s['_quality'], + 'url': s['_stream'], } - if s.get('rtmp_url'): - format['protocol'] = 'rtmp' - format['url'] = s['rtmp_url'] - format['playpath'] = s['video_url'] - else: - format['url'] = s['video_url'] - quality_name = self._search_regex( - r'[,.]([a-zA-Z0-9_-]+),?\.mp4', format['url'], - 'quality name', default='NA') - format['format_id'] = '%s-%s-%s-%s' % ( - determine_ext(format['url']), quality_name, s['media_type'], - s['quality']) + format['format_id'] = '%s-%s' % ( + determine_ext(format['url']), format['quality']) formats.append(format) From b675b32e6b7584c94e22786f1c9b33df258a1912 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 4 Jun 2014 06:47:57 +0200 Subject: [PATCH 046/340] release 2014.06.04 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 3c3c4e777..47dde62b9 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.06.02' +__version__ = '2014.06.04' From 6340716b3ac75384eecf48025c71380949883b0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 4 Jun 2014 20:11:23 +0700 Subject: [PATCH 047/340] [yahoo] Make thumbnail optional (Closes #3043) --- youtube_dl/extractor/yahoo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 393f6ffbe..c95c59db8 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -113,7 +113,7 @@ class YahooIE(InfoExtractor): 'title': meta['title'], 'formats': formats, 'description': clean_html(meta['description']), - 'thumbnail': meta['thumbnail'], + 'thumbnail': meta.get('thumbnail'), } From fb6a5b965b9c2ba1c263036d6b353d5ff0a80b05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 4 Jun 2014 20:13:36 +0700 Subject: [PATCH 048/340] [yahoo] Improve content id extraction --- youtube_dl/extractor/yahoo.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index c95c59db8..181522494 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -21,7 +21,7 @@ class YahooIE(InfoExtractor): 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', 'md5': '4962b075c08be8690a922ee026d05e69', 'info_dict': { - 'id': '214727115', + 'id': '2d25e626-2378-391f-ada0-ddaf1417e588', 'ext': 'mp4', 'title': 'Julian Smith & Travis Legg Watch Julian Smith', 'description': 'Julian and Travis watch Julian Smith', @@ -31,7 +31,7 @@ class YahooIE(InfoExtractor): 'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', 'md5': 'd6e6fc6e1313c608f316ddad7b82b306', 'info_dict': { - 'id': '103000935', + 'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9', 'ext': 'mp4', 'title': 'Codefellas - The Cougar Lies with Spanish Moss', 'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?', @@ -58,9 +58,11 @@ class YahooIE(InfoExtractor): r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, default=None) if items_json is None: - long_id = self._search_regex( + CONTENT_ID_REGEXES = [ r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"', - webpage, 'content ID') + r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"' + ] + long_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID') video_id = long_id else: items = json.loads(items_json) From dede691aca17d7a46720e0ae5084e3284758268a Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Wed, 4 Jun 2014 17:38:41 +0200 Subject: [PATCH 049/340] [yahoo] improve thumbnail extraction --- youtube_dl/extractor/yahoo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py index 181522494..d84be2562 100644 --- a/youtube_dl/extractor/yahoo.py +++ b/youtube_dl/extractor/yahoo.py @@ -70,9 +70,9 @@ class YahooIE(InfoExtractor): # The 'meta' field is not always in the video webpage, we request it # from another page long_id = info['id'] - return self._get_info(long_id, video_id) + return self._get_info(long_id, video_id, webpage) - def _get_info(self, long_id, video_id): + def _get_info(self, long_id, video_id, webpage): query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"' ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="US"' ' AND protocol="http"' % long_id) @@ -115,7 +115,7 @@ class YahooIE(InfoExtractor): 'title': meta['title'], 'formats': formats, 'description': clean_html(meta['description']), - 'thumbnail': meta.get('thumbnail'), + 'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage), } @@ -139,7 +139,7 @@ class YahooNewsIE(YahooIE): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) long_id = self._search_regex(r'contentId: \'(.+?)\',', webpage, 'long id') - return self._get_info(long_id, video_id) + return self._get_info(long_id, video_id, webpage) class YahooSearchIE(SearchInfoExtractor): From 0e3ae92441e33f9fda2b92aefefd2aad27b9837a Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Thu, 5 Jun 2014 18:48:03 +0200 Subject: [PATCH 050/340] [TagesschauIE] Add extractor for tagesschau.de (fixes #3049) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/tagesschau.py | 81 ++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 youtube_dl/extractor/tagesschau.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index def58f1d6..4a640ab9c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -266,6 +266,7 @@ from .streamcz import StreamCZIE from .swrmediathek import SWRMediathekIE from .syfy import SyfyIE from .sztvhu import SztvHuIE +from .tagesschau import TagesschauIE from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE from .ted import TEDIE diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py new file mode 100644 index 000000000..ffcf1c8c4 --- /dev/null +++ b/youtube_dl/extractor/tagesschau.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor + + +class TagesschauIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?\d+)\.html' + + _TESTS = [{ + 'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html', + 'md5': 'bcdeac2194fb296d599ce7929dfa4009', + 'info_dict': { + 'id': '1399128', + 'ext': 'mp4', + 'title': 'Harald Range, Generalbundesanwalt, zu den Ermittlungen', + 'description': 'md5:69da3c61275b426426d711bde96463ab', + 'thumbnail': 're:^http:.*\.jpg$', + }, + }, { + 'url': 'http://www.tagesschau.de/multimedia/video/video-196.html', + 'md5': '8aaa8bf3ae1ca2652309718c03019128', + 'info_dict': { + 'id': '196', + 'ext': 'mp4', + 'title': 'Ukraine-Konflikt: Klitschko in Kiew als B\xfcrgermeister vereidigt', + 'description': 'md5:f22e4af75821d174fa6c977349682691', + 'thumbnail': 're:http://.*\.jpg', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + if video_id.startswith('-'): + display_id = video_id.strip('-') + else: + display_id = video_id + + webpage = self._download_webpage(url, display_id) + + playerpage = self._download_webpage( + 'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id, display_id) + + medias = re.findall(r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"', playerpage) + + formats = [] + for url, ext, res in medias: + + if res == 's': + res = 'small' + quality = 0 + elif res == 'm': + res = 'medium' + quality = 1 + elif res == 'l': + res = 'large' + quality = 2 + + formats.append({ + 'format_id': res+'_'+ext, + 'url': url, + 'quality': quality, + 'ext': ext, + }) + + self._sort_formats(formats) + + thumbnail = re.findall(r'"(/multimedia/.+?\.jpg)"', playerpage)[-1] + + return { + 'id': display_id, + 'title': self._og_search_title(webpage).strip(), + 'thumbnail': 'http://www.tagesschau.de'+thumbnail, + 'formats': formats, + 'description': self._og_search_description(webpage).strip(), + } From 7ffad0af5ab392ed5ea7ec9165fd42d012a669f5 Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Thu, 5 Jun 2014 18:49:34 +0200 Subject: [PATCH 051/340] [TagesschauIE] Remove unused import --- youtube_dl/extractor/tagesschau.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index ffcf1c8c4..bd935063f 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor From 6a15923b77dc610b85810a15ce8a76f6688ed5e9 Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Thu, 5 Jun 2014 19:34:30 +0200 Subject: [PATCH 052/340] [TagesschauIE] Add note to 2nd _download_webpage --- youtube_dl/extractor/tagesschau.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index bd935063f..fec1ff67e 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -43,7 +43,7 @@ class TagesschauIE(InfoExtractor): webpage = self._download_webpage(url, display_id) playerpage = self._download_webpage( - 'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id, display_id) + 'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id, display_id, 'Downloading player page') medias = re.findall(r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"', playerpage) From 70e322695db9d67ba3ab35bf660d92be5e7e55f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 5 Jun 2014 21:21:15 +0200 Subject: [PATCH 053/340] [youtube:playlist] Fix mixes extraction (fixes #3051) The username seems to be empty now. --- youtube_dl/extractor/youtube.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8327fb146..2c51a0b47 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1414,11 +1414,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): title_span = (search_title('playlist-title') or search_title('title long-title') or search_title('title')) title = clean_html(title_span) - video_re = r'''(?x)data-video-username="(.*?)".*? + video_re = r'''(?x)data-video-username=".*?".*? href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id) - matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL)) - # Some of the videos may have been deleted, their username field is empty - ids = [video_id for (username, video_id) in matches if username] + ids = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL)) url_results = self._ids_to_results(ids) return self.playlist_result(url_results, playlist_id, title) From a45e6aadd7c8aa42d769b9c0ee0c7d29258efcf1 Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Fri, 6 Jun 2014 09:00:28 +0200 Subject: [PATCH 054/340] [TagesschauIE] Fix possible error if quality is not defined --- youtube_dl/extractor/tagesschau.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index fec1ff67e..e2ad7c393 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -59,6 +59,8 @@ class TagesschauIE(InfoExtractor): elif res == 'l': res = 'large' quality = 2 + else: + quality = 0 formats.append({ 'format_id': res+'_'+ext, From b4e7447458770a705c064d0eb7f73f49dc885557 Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Fri, 6 Jun 2014 11:21:59 +0200 Subject: [PATCH 055/340] [TeacherTubeIE] Add extractor for teachertube.com videos + classrooms (fixes #3046) --- test/test_playlists.py | 9 +++ youtube_dl/extractor/__init__.py | 4 ++ youtube_dl/extractor/teachertube.py | 85 +++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+) create mode 100644 youtube_dl/extractor/teachertube.py diff --git a/test/test_playlists.py b/test/test_playlists.py index 057ce43f0..465b07b9e 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -28,6 +28,7 @@ from youtube_dl.extractor import ( SoundcloudSetIE, SoundcloudUserIE, SoundcloudPlaylistIE, + TeacherTubeClassroomIE, LivestreamIE, NHLVideocenterIE, BambuserChannelIE, @@ -360,5 +361,13 @@ class TestPlaylists(unittest.TestCase): result['title'], 'Brace Yourself - Today\'s Weirdest News') self.assertTrue(len(result['entries']) >= 10) + def test_TeacherTubeClassroom(self): + dl = FakeYDL() + ie = TeacherTubeClassroomIE(dl) + result = ie.extract('http://www.teachertube.com/view_classroom.php?user=rbhagwati2') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], 'rbhagwati2') + self.assertTrue(len(result['entries']) >= 20) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index def58f1d6..2ad1db555 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -266,6 +266,10 @@ from .streamcz import StreamCZIE from .swrmediathek import SWRMediathekIE from .syfy import SyfyIE from .sztvhu import SztvHuIE +from .teachertube import ( + TeacherTubeIE, + TeacherTubeClassroomIE, +) from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE from .ted import TEDIE diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py new file mode 100644 index 000000000..4740f3d56 --- /dev/null +++ b/youtube_dl/extractor/teachertube.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class TeacherTubeIE(InfoExtractor): + IE_NAME = 'teachertube' + IE_DESC = 'teachertube.com videos' + + _VALID_URL = r'https?://(?:www\.)?teachertube\.com/viewVideo\.php\?video_id=(?P<id>\d+)' + + _TESTS = [{ + 'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997', + 'md5': 'f9434ef992fd65936d72999951ee254c', + 'info_dict': { + 'id': '339997', + 'ext': 'mp4', + 'title': 'Measures of dispersion from a frequency table_x264', + 'description': 'md5:a3e9853487185e9fcd7181a07164650b', + 'thumbnail': 're:http://.*\.jpg', + }, + }, { + 'url': 'http://www.teachertube.com/viewVideo.php?video_id=340064', + 'md5': '0d625ec6bc9bf50f70170942ad580676', + 'info_dict': { + 'id': '340064', + 'ext': 'mp4', + 'title': 'How to Make Paper Dolls _ Paper Art Projects', + 'description': 'md5:2ca52b20cd727773d1dc418b3d6bd07b', + 'thumbnail': 're:http://.*\.jpg', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + url = self._html_search_meta('twitter:player:stream', webpage, 'twitter player') + + formats = [{ + 'format_id': 'flv', + 'url': url.replace('mp4v', 'flv').replace('.mp4', '.flv'), + 'quality': 0, + 'ext': 'flv', + }, { + 'format_id': 'mp4', + 'url': url, + 'quality': 1, + 'ext': 'mp4', + }] + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'formats': formats, + 'description': self._og_search_description(webpage), + } + + +class TeacherTubeClassroomIE(InfoExtractor): + IE_NAME = 'teachertube:classroom' + IE_DESC = 'teachertube.com online classrooms' + + _VALID_URL = r'https?://(?:www\.)?teachertube\.com/view_classroom\.php\?user=(?P<user>[0-9a-zA-Z]+)' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + user_id = mobj.group('user') + + rss = self._download_xml('http://www.teachertube.com/rssclassroom.php?mode=user&username=%s' % user_id, + user_id, 'Downloading classroom RSS') + + entries = [] + for url in rss.findall('.//{http://search.yahoo.com/mrss/}player'): + entries.append(self.url_result(url.attrib['url'], 'TeacherTube')) + + return self.playlist_result(entries, user_id) From 087ca2cb07be342bed18e4d0630a4089249b931f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 6 Jun 2014 14:55:19 +0200 Subject: [PATCH 056/340] [naver] Add rtmp formats (fixes #3054) --- youtube_dl/downloader/rtmp.py | 3 +++ youtube_dl/extractor/naver.py | 13 +++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index 78b1e7cd2..cc6a84106 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -96,6 +96,7 @@ class RtmpFD(FileDownloader): flash_version = info_dict.get('flash_version', None) live = info_dict.get('rtmp_live', False) conn = info_dict.get('rtmp_conn', None) + protocol = info_dict.get('rtmp_protocol', None) self.report_destination(filename) tmpfilename = self.temp_name(filename) @@ -133,6 +134,8 @@ class RtmpFD(FileDownloader): basic_args += ['--conn', entry] elif isinstance(conn, compat_str): basic_args += ['--conn', conn] + if protocol is not None: + basic_args += ['--protocol', protocol] args = basic_args + [[], ['--resume', '--skip', '1']][not live and self.params.get('continuedl', False)] if sys.platform == 'win32' and sys.version_info < (3, 0): diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 4cab30631..160b3f00f 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -47,14 +47,19 @@ class NaverIE(InfoExtractor): formats = [] for format_el in urls.findall('EncodingOptions/EncodingOption'): domain = format_el.find('Domain').text - if domain.startswith('rtmp'): - continue - formats.append({ + f = { 'url': domain + format_el.find('uri').text, 'ext': 'mp4', 'width': int(format_el.find('width').text), 'height': int(format_el.find('height').text), - }) + } + if domain.startswith('rtmp'): + f.update({ + 'ext': 'flv', + 'rtmp_protocol': '1', # rtmpt + }) + formats.append(f) + self._sort_formats(formats) return { 'id': video_id, From 24da5893fc934207990158f81a90816cdc526c1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 6 Jun 2014 14:57:37 +0200 Subject: [PATCH 057/340] [naver] Modernize --- youtube_dl/extractor/naver.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py index 160b3f00f..c0231c197 100644 --- a/youtube_dl/extractor/naver.py +++ b/youtube_dl/extractor/naver.py @@ -1,4 +1,6 @@ # encoding: utf-8 +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -12,12 +14,13 @@ class NaverIE(InfoExtractor): _VALID_URL = r'https?://(?:m\.)?tvcast\.naver\.com/v/(?P<id>\d+)' _TEST = { - u'url': u'http://tvcast.naver.com/v/81652', - u'file': u'81652.mp4', - u'info_dict': { - u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번', - u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.', - u'upload_date': u'20130903', + 'url': 'http://tvcast.naver.com/v/81652', + 'info_dict': { + 'id': '81652', + 'ext': 'mp4', + 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번', + 'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.', + 'upload_date': '20130903', }, } @@ -28,7 +31,7 @@ class NaverIE(InfoExtractor): m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"', webpage) if m_id is None: - raise ExtractorError(u'couldn\'t extract vid and key') + raise ExtractorError('couldn\'t extract vid and key') vid = m_id.group(1) key = m_id.group(2) query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,}) @@ -39,10 +42,10 @@ class NaverIE(InfoExtractor): }) info = self._download_xml( 'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query, - video_id, u'Downloading video info') + video_id, 'Downloading video info') urls = self._download_xml( 'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls, - video_id, u'Downloading video formats info') + video_id, 'Downloading video formats info') formats = [] for format_el in urls.findall('EncodingOptions/EncodingOption'): From 3217377b3c1e0b0b67f85ce8ec808f6c5cf0c5ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Jun 2014 21:15:06 +0700 Subject: [PATCH 058/340] [xvideos] Capture and output inline error if any --- youtube_dl/extractor/xvideos.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index 85e99e1b0..f21e0671f 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -5,6 +5,8 @@ import re from .common import InfoExtractor from ..utils import ( compat_urllib_parse, + ExtractorError, + clean_html, ) @@ -28,6 +30,10 @@ class XVideosIE(InfoExtractor): self.report_extraction(video_id) + mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage) + if mobj: + raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True) + # Extract video URL video_url = compat_urllib_parse.unquote( self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL')) From a6ffb92f0ba9b64b82a56bcf8417744ff3e829f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 6 Jun 2014 21:23:36 +0700 Subject: [PATCH 059/340] [xvideos] Replace test --- youtube_dl/extractor/xvideos.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index f21e0671f..7e0044824 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -13,12 +13,13 @@ from ..utils import ( class XVideosIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)' _TEST = { - 'url': 'http://www.xvideos.com/video939581/funny_porns_by_s_-1', - 'file': '939581.flv', - 'md5': '1d0c835822f0a71a7bf011855db929d0', + 'url': 'http://www.xvideos.com/video4588838/biker_takes_his_girl', + 'md5': '4b46ae6ea5e6e9086e714d883313c0c9', 'info_dict': { - "title": "Funny Porns By >>>>S<<<<<< -1", - "age_limit": 18, + 'id': '4588838', + 'ext': 'flv', + 'title': 'Biker Takes his Girl', + 'age_limit': 18, } } From 566bd96da83ff44d1e3e4285511ee17535da26e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 7 Jun 2014 13:09:21 +0200 Subject: [PATCH 060/340] [teachingchannel] Add extractor (closes #3048) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/teachingchannel.py | 33 +++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 youtube_dl/extractor/teachingchannel.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index def58f1d6..1fba56405 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -266,6 +266,7 @@ from .streamcz import StreamCZIE from .swrmediathek import SWRMediathekIE from .syfy import SyfyIE from .sztvhu import SztvHuIE +from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE from .techtalks import TechTalksIE from .ted import TEDIE diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py new file mode 100644 index 000000000..117afa9bf --- /dev/null +++ b/youtube_dl/extractor/teachingchannel.py @@ -0,0 +1,33 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .ooyala import OoyalaIE + + +class TeachingChannelIE(InfoExtractor): + _VALID_URL = r'https?://www\.teachingchannel\.org/videos/(?P<title>.+)' + + _TEST = { + 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution', + 'info_dict': { + 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM', + 'ext': 'mp4', + 'title': 'A History of Teaming', + 'description': 'md5:2a9033db8da81f2edffa4c99888140b3', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + title = mobj.group('title') + webpage = self._download_webpage(url, title) + ooyala_code = self._search_regex( + r'data-embed-code=\'(.+?)\'', webpage, 'ooyala code') + + return OoyalaIE._build_url_result(ooyala_code) From 24577db24145320750c0dc235354dab16bca5507 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 7 Jun 2014 13:43:27 +0200 Subject: [PATCH 061/340] [test/test_youtube_lists] Replace mix list The old video doesn't have a mix anymore. --- test/test_youtube_lists.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 7d3b9c705..3aadedd64 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -112,11 +112,11 @@ class TestYoutubeLists(unittest.TestCase): def test_youtube_mix(self): dl = FakeYDL() ie = YoutubePlaylistIE(dl) - result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y') + result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w') entries = result['entries'] self.assertTrue(len(entries) >= 20) original_video = entries[0] - self.assertEqual(original_video['id'], 'rjFaenf1T-Y') + self.assertEqual(original_video['id'], 'OQpdSVF_k_w') def test_youtube_toptracks(self): print('Skipping: The playlist page gives error 500') From 3c80377b693f8b5fc7afa9a24e03af6fe336177d Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Sat, 7 Jun 2014 14:31:10 +0200 Subject: [PATCH 062/340] [Youtube] Add format code 271 (1440p webm) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2c51a0b47..7c50881c4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -223,6 +223,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Dash webm audio '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50}, From 4a5b4d34dc1db95871c6eca61d0fd3edb711743c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 7 Jun 2014 15:14:20 +0200 Subject: [PATCH 063/340] [tagesschau] Add support for width/height --- youtube_dl/extractor/tagesschau.py | 43 ++++++++++++++---------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index e2ad7c393..36331529e 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class TagesschauIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?\d+)\.html' + _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/multimedia/video/video(?P<id>-?[0-9]+)\.html' _TESTS = [{ 'url': 'http://www.tagesschau.de/multimedia/video/video1399128.html', @@ -25,11 +25,17 @@ class TagesschauIE(InfoExtractor): 'info_dict': { 'id': '196', 'ext': 'mp4', - 'title': 'Ukraine-Konflikt: Klitschko in Kiew als B\xfcrgermeister vereidigt', + 'title': 'Ukraine-Konflikt: Klitschko in Kiew als Bürgermeister vereidigt', 'description': 'md5:f22e4af75821d174fa6c977349682691', 'thumbnail': 're:http://.*\.jpg', }, - }] + }] + + _FORMATS = { + 's': {'width': 256, 'height': 144, 'quality': 1}, + 'm': {'width': 512, 'height': 288, 'quality': 2}, + 'l': {'width': 960, 'height': 544, 'quality': 3}, + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -43,31 +49,22 @@ class TagesschauIE(InfoExtractor): webpage = self._download_webpage(url, display_id) playerpage = self._download_webpage( - 'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id, display_id, 'Downloading player page') + 'http://www.tagesschau.de/multimedia/video/video%s~player_autoplay-true.html' % video_id, + display_id, 'Downloading player page') - medias = re.findall(r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"', playerpage) + medias = re.findall( + r'"(http://media.+?)", type:"video/(.+?)", quality:"(.+?)"', + playerpage) formats = [] for url, ext, res in medias: - - if res == 's': - res = 'small' - quality = 0 - elif res == 'm': - res = 'medium' - quality = 1 - elif res == 'l': - res = 'large' - quality = 2 - else: - quality = 0 - - formats.append({ - 'format_id': res+'_'+ext, + f = { + 'format_id': res + '_' + ext, 'url': url, - 'quality': quality, 'ext': ext, - }) + } + f.update(self._FORMATS.get(res, {})) + formats.append(f) self._sort_formats(formats) @@ -76,7 +73,7 @@ class TagesschauIE(InfoExtractor): return { 'id': display_id, 'title': self._og_search_title(webpage).strip(), - 'thumbnail': 'http://www.tagesschau.de'+thumbnail, + 'thumbnail': 'http://www.tagesschau.de' + thumbnail, 'formats': formats, 'description': self._og_search_description(webpage).strip(), } From f0a6c3d2bc3a7d84c9a25d8ed96e6549f3ebdacb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 7 Jun 2014 20:32:23 +0700 Subject: [PATCH 064/340] [teachertube] Add support for audios --- youtube_dl/extractor/teachertube.py | 34 ++++++++++++++++++----------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index 4740f3d56..6d52763f9 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -4,13 +4,17 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ( + qualities, + determine_ext, +) class TeacherTubeIE(InfoExtractor): IE_NAME = 'teachertube' IE_DESC = 'teachertube.com videos' - _VALID_URL = r'https?://(?:www\.)?teachertube\.com/viewVideo\.php\?video_id=(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=)(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997', @@ -32,6 +36,15 @@ class TeacherTubeIE(InfoExtractor): 'description': 'md5:2ca52b20cd727773d1dc418b3d6bd07b', 'thumbnail': 're:http://.*\.jpg', }, + }, { + 'url': 'http://www.teachertube.com/music.php?music_id=8805', + 'md5': '01e8352006c65757caf7b961f6050e21', + 'info_dict': { + 'id': '8805', + 'ext': 'mp3', + 'title': 'PER ASPERA AD ASTRA', + 'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNIČKE ŠKOLE PER ASPERA AD ASTRA', + }, }] def _real_extract(self, url): @@ -40,19 +53,14 @@ class TeacherTubeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - url = self._html_search_meta('twitter:player:stream', webpage, 'twitter player') + quality = qualities(['mp3', 'flv', 'mp4']) - formats = [{ - 'format_id': 'flv', - 'url': url.replace('mp4v', 'flv').replace('.mp4', '.flv'), - 'quality': 0, - 'ext': 'flv', - }, { - 'format_id': 'mp4', - 'url': url, - 'quality': 1, - 'ext': 'mp4', - }] + formats = [ + { + 'url': media_url, + 'quality': quality(determine_ext(media_url)) + } for media_url in set(zip(*re.findall(r'([\'"])file\1\s*:\s*"([^"]+)"', webpage))[1]) + ] self._sort_formats(formats) From d5519808235997db2189e840bf87c89693a208cc Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 7 Jun 2014 15:33:45 +0200 Subject: [PATCH 065/340] [spiegeltv] Simplify and PEP8 --- youtube_dl/YoutubeDL.py | 9 ++++++ youtube_dl/extractor/common.py | 8 +++-- youtube_dl/extractor/spiegeltv.py | 49 +++++++++++++++++++------------ 3 files changed, 46 insertions(+), 20 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f3666573a..455c0a7b0 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -717,6 +717,15 @@ class YoutubeDL(object): info_dict['playlist'] = None info_dict['playlist_index'] = None + thumbnails = info_dict.get('thumbnails') + if thumbnails: + for t in thumbnails: + if 'width' in t and 'height' in t: + t['resolution'] = '%dx%d' % (t['width'], t['height']) + + if thumbnails and 'thumbnail' not in info_dict: + info_dict['thumbnail'] = thumbnails[-1]['url'] + if 'display_id' not in info_dict and 'id' in info_dict: info_dict['display_id'] = info_dict['id'] diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index db472aace..49e75405e 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -92,8 +92,12 @@ class InfoExtractor(object): unique, but available before title. Typically, id is something like "4234987", title "Dancing naked mole rats", and display_id "dancing-naked-mole-rats" - thumbnails: A list of dictionaries (with the entries "resolution" and - "url") for the varying thumbnails + thumbnails: A list of dictionaries, with the following entries: + * "url" + * "width" (optional, int) + * "height" (optional, int) + * "resolution" (optional, string "{width}x{height"}, + deprecated) thumbnail: Full URL to a video thumbnail image. description: One-line video description. uploader: Full name of the video uploader. diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py index ffd554633..7f388aced 100644 --- a/youtube_dl/extractor/spiegeltv.py +++ b/youtube_dl/extractor/spiegeltv.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor + class SpiegeltvIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/filme/(?P<id>[\-a-z0-9]+)' _TEST = { @@ -13,6 +14,7 @@ class SpiegeltvIE(InfoExtractor): 'ext': 'm4v', 'title': 'Flug MH370', 'description': 'Das Rätsel um die Boeing 777 der Malaysia-Airlines', + 'thumbnail': 're:http://.*\.jpg$', }, 'params': { # rtmp download @@ -27,36 +29,48 @@ class SpiegeltvIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'<h1.*?>(.*?)</h1>', webpage, 'title') - apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com'; + apihost = 'http://spiegeltv-ivms2-restapi.s3.amazonaws.com' + version_json = self._download_json( + '%s/version.json' % apihost, video_id, + note='Downloading version information') + version_name = version_json['version_name'] - version_json = self._download_json('%s/version.json' % apihost, None) - version_name = version_json['version_name'] + slug_json = self._download_json( + '%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id), + video_id, + note='Downloading object information') + oid = slug_json['object_id'] - slug_json = self._download_json('%s/%s/restapi/slugs/%s.json' % (apihost, version_name, video_id), None) - oid = slug_json['object_id'] - - media_json = self._download_json('%s/%s/restapi/media/%s.json' % (apihost, version_name, oid), None) - uuid = media_json['uuid'] - is_wide = media_json['is_wide'] + media_json = self._download_json( + '%s/%s/restapi/media/%s.json' % (apihost, version_name, oid), + video_id, note='Downloading media information') + uuid = media_json['uuid'] + is_wide = media_json['is_wide'] - server_json = self._download_json('http://www.spiegel.tv/streaming_servers/', None) - server = server_json[0]['endpoint'] + server_json = self._download_json( + 'http://www.spiegel.tv/streaming_servers/', video_id, + note='Downloading server information') + server = server_json[0]['endpoint'] thumbnails = [] for image in media_json['images']: - thumbnails.append({'url': image['url'], 'resolution': str(image['width']) + 'x' + str(image['height']) }) + thumbnails.append({ + 'url': image['url'], + 'width': image['width'], + 'height': image['height'], + }) description = media_json['subtitle'] - duration = int(round(media_json['duration_in_ms'] / 1000)) + duration = media_json['duration_in_ms'] / 1000. if is_wide: - format = '16x9' + format = '16x9' else: - format = '4x3' + format = '4x3' url = server + 'mp4:' + uuid + '_spiegeltv_0500_' + format + '.m4v' - return_dict = { + return { 'id': video_id, 'title': title, 'url': url, @@ -64,5 +78,4 @@ class SpiegeltvIE(InfoExtractor): 'description': description, 'duration': duration, 'thumbnails': thumbnails - } - return return_dict + } \ No newline at end of file From be6d722904f646e1c7f879cd32bfced22abffada Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 7 Jun 2014 15:39:21 +0200 Subject: [PATCH 066/340] [cnn] Improve thumbnail extraction --- youtube_dl/YoutubeDL.py | 2 ++ youtube_dl/extractor/cnn.py | 10 ++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 455c0a7b0..dc0ba986a 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -719,6 +719,8 @@ class YoutubeDL(object): thumbnails = info_dict.get('thumbnails') if thumbnails: + thumbnails.sort(key=lambda t: ( + t.get('width'), t.get('height'), t.get('url'))) for t in thumbnails: if 'width' in t and 'height' in t: t['resolution'] = '%dx%d' % (t['width'], t['height']) diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py index b32cb8980..dae40c136 100644 --- a/youtube_dl/extractor/cnn.py +++ b/youtube_dl/extractor/cnn.py @@ -79,8 +79,11 @@ class CNNIE(InfoExtractor): self._sort_formats(formats) - thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')]) - thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails] + thumbnails = [{ + 'height': int(t.attrib['height']), + 'width': int(t.attrib['width']), + 'url': t.text, + } for t in info.findall('images/image')] metas_el = info.find('metas') upload_date = ( @@ -93,8 +96,7 @@ class CNNIE(InfoExtractor): 'id': info.attrib['id'], 'title': info.find('headline').text, 'formats': formats, - 'thumbnail': thumbnails[-1][1], - 'thumbnails': thumbs_dict, + 'thumbnails': thumbnails, 'description': info.find('description').text, 'duration': duration, 'upload_date': upload_date, From d30d28156d2379c41ae08dcc5b50d292a0ca3d51 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 7 Jun 2014 15:40:27 +0200 Subject: [PATCH 067/340] Credit @georgjaehnig for spiegeltv --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index cbb053e13..e2a4c04da 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -57,6 +57,7 @@ __authors__ = ( 'Jason Normore', 'Hoje Lee', 'Adam Thalhammer', + 'Georg Jähnig', ) __license__ = 'Public Domain' From 05741e05d9f7281d8caf6072685ac84cd174d242 Mon Sep 17 00:00:00 2001 From: codelol <jackrobin@gmail.com> Date: Fri, 30 May 2014 21:15:59 -0700 Subject: [PATCH 068/340] [ku6] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/ku6.py | 38 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 youtube_dl/extractor/ku6.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 72523c54d..ace298fb3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -142,6 +142,7 @@ from .khanacademy import KhanAcademyIE from .kickstarter import KickStarterIE from .keek import KeekIE from .kontrtube import KontrTubeIE +from .ku6 import Ku6IE from .la7 import LA7IE from .lifenews import LifeNewsIE from .liveleak import LiveLeakIE diff --git a/youtube_dl/extractor/ku6.py b/youtube_dl/extractor/ku6.py new file mode 100644 index 000000000..eecbe2696 --- /dev/null +++ b/youtube_dl/extractor/ku6.py @@ -0,0 +1,38 @@ +from __future__ import unicode_literals +import re +from .common import InfoExtractor + +class Ku6IE(InfoExtractor): + _VALID_URL = r'http://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html' + _TEST = { + 'url': 'http://v.ku6.com/show/JG-8yS14xzBr4bCn1pu0xw...html', + 'info_dict': { + 'id': 'JG-8yS14xzBr4bCn1pu0xw', + 'ext': 'f4v', + u'title': u'techniques test', + } + } + + + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + #title = self._html_search_meta('title', webpage, 'title') + title = self._search_regex(r'<h1 title=.*>(.*?)</h1>', webpage, 'title') + self.to_screen('title: '+title) + + dataUrl = 'http://v.ku6.com/fetchVideo4Player/'+video_id+'.html' + jsonData = self._download_json(dataUrl, video_id) + downloadUrl = jsonData['data']['f'] + + return { + 'id': video_id, + 'title': title, + 'url': downloadUrl + # TODO more properties (see youtube_dl/extractor/common.py) + + } + From 90e0fd4badab6884703b7c759fc5cecd9f855e42 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 7 Jun 2014 15:46:33 +0200 Subject: [PATCH 069/340] [ku6] Improve (#3015) --- youtube_dl/extractor/ku6.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/ku6.py b/youtube_dl/extractor/ku6.py index eecbe2696..484239b19 100644 --- a/youtube_dl/extractor/ku6.py +++ b/youtube_dl/extractor/ku6.py @@ -1,30 +1,29 @@ from __future__ import unicode_literals + import re + from .common import InfoExtractor + class Ku6IE(InfoExtractor): _VALID_URL = r'http://v\.ku6\.com/show/(?P<id>[a-zA-Z0-9\-\_]+)(?:\.)*html' _TEST = { 'url': 'http://v.ku6.com/show/JG-8yS14xzBr4bCn1pu0xw...html', + 'md5': '01203549b9efbb45f4b87d55bdea1ed1', 'info_dict': { 'id': 'JG-8yS14xzBr4bCn1pu0xw', 'ext': 'f4v', - u'title': u'techniques test', + 'title': 'techniques test', } } - - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - #title = self._html_search_meta('title', webpage, 'title') title = self._search_regex(r'<h1 title=.*>(.*?)</h1>', webpage, 'title') - self.to_screen('title: '+title) - - dataUrl = 'http://v.ku6.com/fetchVideo4Player/'+video_id+'.html' + dataUrl = 'http://v.ku6.com/fetchVideo4Player/%s.html' % video_id jsonData = self._download_json(dataUrl, video_id) downloadUrl = jsonData['data']['f'] @@ -32,7 +31,5 @@ class Ku6IE(InfoExtractor): 'id': video_id, 'title': title, 'url': downloadUrl - # TODO more properties (see youtube_dl/extractor/common.py) - } From eb92077720e9a80fec0b3d5d23d908f2f2711b91 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 7 Jun 2014 15:51:01 +0200 Subject: [PATCH 070/340] [soundcloud] Add duration information (Closes #3035, Fixes #3034) --- youtube_dl/extractor/soundcloud.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index d6f453fb9..25515f068 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -12,6 +12,7 @@ from ..utils import ( compat_urllib_parse, ExtractorError, + int_or_none, unified_strdate, ) @@ -44,7 +45,8 @@ class SoundcloudIE(InfoExtractor): "upload_date": "20121011", "description": "No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o'd", "uploader": "E.T. ExTerrestrial Music", - "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1" + "title": "Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1", + "duration": 143, } }, # not streamable song @@ -57,6 +59,7 @@ class SoundcloudIE(InfoExtractor): 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', 'uploader': 'The Royal Concept', 'upload_date': '20120521', + 'duration': 227, }, 'params': { # rtmp @@ -74,6 +77,7 @@ class SoundcloudIE(InfoExtractor): 'uploader': 'jaimeMF', 'description': 'test chars: \"\'/\\ä↭', 'upload_date': '20131209', + 'duration': 9, }, }, # downloadable song @@ -87,6 +91,7 @@ class SoundcloudIE(InfoExtractor): 'description': 'Vocals', 'uploader': 'Sim Gretina', 'upload_date': '20130815', + #'duration': 42, }, }, ] @@ -119,6 +124,7 @@ class SoundcloudIE(InfoExtractor): 'title': info['title'], 'description': info['description'], 'thumbnail': thumbnail, + 'duration': int_or_none(info.get('duration'), 1000), } formats = [] if info.get('downloadable', False): From 8ae5ce17264fde131acd11a454cafe9a32a34bac Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 7 Jun 2014 15:52:49 +0200 Subject: [PATCH 071/340] [cmt] Simplify (mentioned in #2072) --- youtube_dl/extractor/cmt.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py index 88e0e9aba..e96c59f71 100644 --- a/youtube_dl/extractor/cmt.py +++ b/youtube_dl/extractor/cmt.py @@ -1,19 +1,19 @@ +from __future__ import unicode_literals from .mtv import MTVIE + class CMTIE(MTVIE): - IE_NAME = u'cmt.com' + IE_NAME = 'cmt.com' _VALID_URL = r'https?://www\.cmt\.com/videos/.+?/(?P<videoid>[^/]+)\.jhtml' _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/' - _TESTS = [ - { - u'url': u'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', - u'md5': u'e6b7ef3c4c45bbfae88061799bbba6c2', - u'info_dict': { - u'id': u'989124', - u'ext': u'mp4', - u'title': u'Garth Brooks - "The Call (featuring Trisha Yearwood)"', - u'description': u'Blame It All On My Roots', - }, + _TESTS = [{ + 'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', + 'md5': 'e6b7ef3c4c45bbfae88061799bbba6c2', + 'info_dict': { + 'id': '989124', + 'ext': 'mp4', + 'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"', + 'description': 'Blame It All On My Roots', }, - ] + }] From f2741c8d3ac45edac72dd5c3cad60b8ffcba8cf6 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 7 Jun 2014 16:39:08 +0200 Subject: [PATCH 072/340] [vh1] Simplify --- youtube_dl/extractor/__init__.py | 7 +- youtube_dl/extractor/vh1.py | 207 +++++++++++++++---------------- 2 files changed, 104 insertions(+), 110 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 01c21189b..15a42ce44 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -306,12 +306,7 @@ from .veehd import VeeHDIE from .veoh import VeohIE from .vesti import VestiIE from .vevo import VevoIE -from .vh1 import ( - VH1EpisodeIE, - VH1ClipIE, - VH1ShortUrlIE, - VH1MusicVideoIE -) +from .vh1 import VH1IE from .viddler import ViddlerIE from .videobam import VideoBamIE from .videodetective import VideoDetectiveIE diff --git a/youtube_dl/extractor/vh1.py b/youtube_dl/extractor/vh1.py index 0e30d7bde..447c6a0bb 100644 --- a/youtube_dl/extractor/vh1.py +++ b/youtube_dl/extractor/vh1.py @@ -1,121 +1,120 @@ +from __future__ import unicode_literals + from .mtv import MTVIE + import re from ..utils import fix_xml_ampersands + class VH1IE(MTVIE): - IE_NAME = u'vh1.com' + IE_NAME = 'vh1.com' _FEED_URL = 'http://www.vh1.com/player/embed/AS3/fullepisode/rss/' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') - idoc = self._download_xml( - self._FEED_URL + '?id=' + video_id, video_id, - 'Downloading info', transform_source=fix_xml_ampersands) - return [self._get_video_info(item) for item in idoc.findall('.//item')] - - -class VH1EpisodeIE(VH1IE): - _VALID_URL = r'https?://www\.vh1\.com/video/.+?/full-episodes/.+?/(?P<videoid>[^/]+)/playlist\.jhtml' - _TESTS = [ - { - u'url': u'http://www.vh1.com/video/metal-evolution/full-episodes/progressive-metal/1678612/playlist.jhtml', - u'playlist': [ - { - u'info_dict': { - u'id': u'731565', - u'ext': u'mp4', - u'title': u'Metal Evolution: Ep. 11 Act 1', - u'description': u'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 12 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.' - } - }, - { - u'info_dict': { - u'id': u'731567', - u'ext': u'mp4', - u'title': u'Metal Evolution: Ep. 11 Act 2', - u'description': u'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.' - } - }, - - { - u'info_dict': { - u'id': u'731568', - u'ext': u'mp4', - u'title': u'Metal Evolution: Ep. 11 Act 3', - u'description': u'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.' - } - }, - { - u'info_dict': { - u'id': u'731569', - u'ext': u'mp4', - u'title': u'Metal Evolution: Ep. 11 Act 4', - u'description': u'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.' - } - }, - { - u'info_dict': { - u'id': u'731570', - u'ext': u'mp4', - u'title': u'Metal Evolution: Ep. 11 Act 5', - u'description': u'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.' - } + _TESTS = [{ + 'url': 'http://www.vh1.com/video/metal-evolution/full-episodes/progressive-metal/1678612/playlist.jhtml', + 'playlist': [ + { + 'md5': '7827a7505f59633983165bbd2c119b52', + 'info_dict': { + 'id': '731565', + 'ext': 'mp4', + 'title': 'Metal Evolution: Ep. 11 Act 1', + 'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 12 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.' + } + }, + { + 'md5': '34fb4b7321c546b54deda2102a61821f', + 'info_dict': { + 'id': '731567', + 'ext': 'mp4', + 'title': 'Metal Evolution: Ep. 11 Act 2', + 'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.' + } + }, + { + 'md5': '813f38dba4c1b8647196135ebbf7e048', + 'info_dict': { + 'id': '731568', + 'ext': 'mp4', + 'title': 'Metal Evolution: Ep. 11 Act 3', + 'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.' + } + }, + { + 'md5': '51adb72439dfaed11c799115d76e497f', + 'info_dict': { + 'id': '731569', + 'ext': 'mp4', + 'title': 'Metal Evolution: Ep. 11 Act 4', + 'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.' + } + }, + { + 'md5': '93d554aaf79320703b73a95288c76a6e', + 'info_dict': { + 'id': '731570', + 'ext': 'mp4', + 'title': 'Metal Evolution: Ep. 11 Act 5', + 'description': 'Many rock academics have proclaimed that the truly progressive musicianship of the last 20 years has been found right here in the world of heavy metal, rather than obvious locales such as jazz, fusion or progressive rock. It stands to reason then, that much of this jaw-dropping virtuosity occurs within what\'s known as progressive metal, a genre that takes root with the likes of Rush in the \'70s, Queensryche and Fates Warning in the \'80s, and Dream Theater in the \'90s. Since then, the genre has exploded with creativity, spawning mind-bending, genre-defying acts such as Tool, Mastodon, Coheed And Cambria, Porcupine Tree, Meshuggah, A Perfect Circle and Opeth. Episode 11 looks at the extreme musicianship of these bands, as well as their often extreme literary prowess and conceptual strength, the end result being a rich level of respect and attention such challenging acts have brought upon the world of heavy metal, from a critical community usually dismissive of the form.' } - ] - } - ] - - -class VH1ClipIE(VH1IE): - _VALID_URL = r'https?://www\.vh1\.com/video/misc/.+?/.+?\.jhtml#id=(?P<videoid>[^/]+)$' - _TESTS = [ - { - u'url': u'http://www.vh1.com/video/misc/706675/metal-evolution-episode-1-pre-metal-show-clip.jhtml#id=1674118', - u'info_dict': { - u'id': u'706675', - u'ext': u'mp4', - u'title': u'Metal Evolution: Episode 1 Pre-Metal Show Clip', - u'description': u'The greatest documentary ever made about Heavy Metal begins as our host Sam Dunn travels the globe to seek out the origins and influences that helped create Heavy Metal. Sam speaks to legends like Kirk Hammett, Alice Cooper, Slash, Bill Ward, Geezer Butler, Tom Morello, Ace Frehley, Lemmy Kilmister, Dave Davies, and many many more. This episode is the prologue for the 11 hour series, and Sam goes back to the very beginning to reveal how Heavy Metal was created.' } + ], + '_skip': 'Blocked outside the US', + }, { + # Clip + 'url': 'http://www.vh1.com/video/misc/706675/metal-evolution-episode-1-pre-metal-show-clip.jhtml#id=1674118', + 'md5': '7d67cf6d9cdc6b4f3d3ac97a55403844', + 'info_dict': { + 'id': '706675', + 'ext': 'mp4', + 'title': 'Metal Evolution: Episode 1 Pre-Metal Show Clip', + 'description': 'The greatest documentary ever made about Heavy Metal begins as our host Sam Dunn travels the globe to seek out the origins and influences that helped create Heavy Metal. Sam speaks to legends like Kirk Hammett, Alice Cooper, Slash, Bill Ward, Geezer Butler, Tom Morello, Ace Frehley, Lemmy Kilmister, Dave Davies, and many many more. This episode is the prologue for the 11 hour series, and Sam goes back to the very beginning to reveal how Heavy Metal was created.' } - ] - - -class VH1ShortUrlIE(VH1IE): - _VALID_URL = r'https?://www\.vh1\.com/video/play.jhtml\?id=(?P<videoid>[^/]+)$' - _TESTS = [ - { - u'url': u'http://www.vh1.com/video/play.jhtml?id=1678353', - u'info_dict': { - u'id': u'730355', - u'ext': u'mp4', - u'title': u'Metal Evolution: Episode 11 Progressive Metal Sneak', - u'description': u'In Metal Evolution\'s finale sneak, Sam sits with Michael Giles of King Crimson and gets feedback from Metallica guitarist Kirk Hammett on why the group was influential.' - } + }, { + # Short link + 'url': 'http://www.vh1.com/video/play.jhtml?id=1678353', + 'md5': '853192b87ad978732b67dd8e549b266a', + 'info_dict': { + 'id': '730355', + 'ext': 'mp4', + 'title': 'Metal Evolution: Episode 11 Progressive Metal Sneak', + 'description': 'In Metal Evolution\'s finale sneak, Sam sits with Michael Giles of King Crimson and gets feedback from Metallica guitarist Kirk Hammett on why the group was influential.' } - ] - - -class VH1MusicVideoIE(VH1IE): - _VALID_URL = r'https?://www\.vh1\.com/video/.+?/(?P<videoid>[^/]+)/.+?$' - _TESTS = [ - { - u'url': u'http://www.vh1.com/video/macklemore-ryan-lewis/900535/cant-hold-us-ft-ray-dalton.jhtml', - u'info_dict': { - u'id': u'900535', - u'ext': u'mp4', - u'title': u'Macklemore & Ryan Lewis - "Can\'t Hold Us ft. Ray Dalton"', - u'description': u'The Heist' - } + }, { + 'url': 'http://www.vh1.com/video/macklemore-ryan-lewis/900535/cant-hold-us-ft-ray-dalton.jhtml', + 'info_dict': { + 'id': '900535', + 'ext': 'mp4', + 'title': 'Macklemore & Ryan Lewis - "Can\'t Hold Us ft. Ray Dalton"', + 'description': 'The Heist' } - ] + }] + + _VALID_URL = r'''(?x) + https?://www\.vh1\.com/video/ + (?: + .+?/full-episodes/.+?/(?P<playlist_id>[^/]+)/playlist\.jhtml + | + (?: + play.jhtml\?id=| + misc/.+?/.+?\.jhtml\#id= + ) + (?P<video_id>[0-9]+)$ + | + [^/]+/(?P<music_id>[0-9]+)/[^/]+? + ) + ''' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('videoid') - # difference from VH1IE._real_extract() is "vid" param instead of "id" + if mobj.group('music_id'): + id_field = 'vid' + video_id = mobj.group('music_id') + else: + video_id = mobj.group('playlist_id') or mobj.group('video_id') + id_field = 'id' + doc_url = '%s?%s=%s' % (self._FEED_URL, id_field, video_id) + idoc = self._download_xml( - self._FEED_URL + '?vid=' + video_id, video_id, + doc_url, video_id, 'Downloading info', transform_source=fix_xml_ampersands) return [self._get_video_info(item) for item in idoc.findall('.//item')] From 1c0ade7afab6cb11404ddb19405a31f38aad54bf Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 7 Jun 2014 16:40:16 +0200 Subject: [PATCH 073/340] [vh1] Skip tests (Do not work from Germany) --- youtube_dl/extractor/vh1.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vh1.py b/youtube_dl/extractor/vh1.py index 447c6a0bb..2f77e3898 100644 --- a/youtube_dl/extractor/vh1.py +++ b/youtube_dl/extractor/vh1.py @@ -58,7 +58,7 @@ class VH1IE(MTVIE): } } ], - '_skip': 'Blocked outside the US', + 'skip': 'Blocked outside the US', }, { # Clip 'url': 'http://www.vh1.com/video/misc/706675/metal-evolution-episode-1-pre-metal-show-clip.jhtml#id=1674118', @@ -68,7 +68,8 @@ class VH1IE(MTVIE): 'ext': 'mp4', 'title': 'Metal Evolution: Episode 1 Pre-Metal Show Clip', 'description': 'The greatest documentary ever made about Heavy Metal begins as our host Sam Dunn travels the globe to seek out the origins and influences that helped create Heavy Metal. Sam speaks to legends like Kirk Hammett, Alice Cooper, Slash, Bill Ward, Geezer Butler, Tom Morello, Ace Frehley, Lemmy Kilmister, Dave Davies, and many many more. This episode is the prologue for the 11 hour series, and Sam goes back to the very beginning to reveal how Heavy Metal was created.' - } + }, + 'skip': 'Blocked outside the US', }, { # Short link 'url': 'http://www.vh1.com/video/play.jhtml?id=1678353', @@ -78,15 +79,18 @@ class VH1IE(MTVIE): 'ext': 'mp4', 'title': 'Metal Evolution: Episode 11 Progressive Metal Sneak', 'description': 'In Metal Evolution\'s finale sneak, Sam sits with Michael Giles of King Crimson and gets feedback from Metallica guitarist Kirk Hammett on why the group was influential.' - } + }, + 'skip': 'Blocked outside the US', }, { 'url': 'http://www.vh1.com/video/macklemore-ryan-lewis/900535/cant-hold-us-ft-ray-dalton.jhtml', + 'md5': 'b1bcb5b4380c9d7f544065589432dee7', 'info_dict': { 'id': '900535', 'ext': 'mp4', 'title': 'Macklemore & Ryan Lewis - "Can\'t Hold Us ft. Ray Dalton"', 'description': 'The Heist' - } + }, + 'skip': 'Blocked outside the US', }] _VALID_URL = r'''(?x) From 9cc977f104410921063bc957789518b64d1ab46c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 7 Jun 2014 16:41:44 +0200 Subject: [PATCH 074/340] Credit @ralfharing for vh1 --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index e2a4c04da..1e01432d2 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -58,6 +58,7 @@ __authors__ = ( 'Hoje Lee', 'Adam Thalhammer', 'Georg Jähnig', + 'Ralf Haring', ) __license__ = 'Public Domain' From 059009c592d1851c157632657b2ca53e782bdeb4 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 7 Jun 2014 16:42:53 +0200 Subject: [PATCH 075/340] release 2014.06.07 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 47dde62b9..6fe7c7b25 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.06.04' +__version__ = '2014.06.07' From 94128d6b0d94551ea23daf59983557c777c1e251 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 7 Jun 2014 16:50:19 +0200 Subject: [PATCH 076/340] [nrk] Fix test checksum --- youtube_dl/extractor/nrk.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 3a6a7883e..1f066cf05 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -6,7 +6,7 @@ import re from .common import InfoExtractor from ..utils import ( ExtractorError, - int_or_none, + float_or_none, unified_strdate, ) @@ -89,7 +89,7 @@ class NRKTVIE(InfoExtractor): }, { 'url': 'http://tv.nrk.no/program/mdfp15000514', - 'md5': '383650ece2b25ecec996ad7b5bb2a384', + 'md5': 'af01795a31f1cf7265c8657534d8077b', 'info_dict': { 'id': 'mdfp15000514', 'ext': 'flv', @@ -111,9 +111,8 @@ class NRKTVIE(InfoExtractor): description = self._html_search_meta('description', page, 'description') thumbnail = self._html_search_regex(r'data-posterimage="([^"]+)"', page, 'thumbnail', fatal=False) upload_date = unified_strdate(self._html_search_meta('rightsfrom', page, 'upload date', fatal=False)) - duration = self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False) - if duration: - duration = float(duration) + duration = float_or_none( + self._html_search_regex(r'data-duration="([^"]+)"', page, 'duration', fatal=False)) formats = [] From 23ae281b31d5c4042f15ffb8e5ded0065a0dc808 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 7 Jun 2014 16:52:11 +0200 Subject: [PATCH 077/340] [fc2] Fall back to webpage title if needed --- youtube_dl/extractor/fc2.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/fc2.py b/youtube_dl/extractor/fc2.py index 18f91efac..c663a0f81 100644 --- a/youtube_dl/extractor/fc2.py +++ b/youtube_dl/extractor/fc2.py @@ -50,10 +50,13 @@ class FC2IE(InfoExtractor): raise ExtractorError('Error code: %s' % info['err_code'][0]) video_url = info['filepath'][0] + '?mid=' + info['mid'][0] + title_info = info.get('title') + if title_info: + title = title_info[0] return { 'id': video_id, - 'title': info['title'][0], + 'title': title, 'url': video_url, 'ext': 'flv', 'thumbnail': thumbnail, From 814d4257dfe75b591dee2120c57cb44ebb547dff Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sat, 7 Jun 2014 16:52:34 +0200 Subject: [PATCH 078/340] Remove unused imports --- youtube_dl/extractor/ntv.py | 1 - youtube_dl/extractor/slutload.py | 3 --- youtube_dl/extractor/soundcloud.py | 1 - 3 files changed, 5 deletions(-) diff --git a/youtube_dl/extractor/ntv.py b/youtube_dl/extractor/ntv.py index 733ed6c26..ed60314ec 100644 --- a/youtube_dl/extractor/ntv.py +++ b/youtube_dl/extractor/ntv.py @@ -5,7 +5,6 @@ import re from .common import InfoExtractor from ..utils import ( - ExtractorError, unescapeHTML ) diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py index ecc0abfda..e6e7d0865 100644 --- a/youtube_dl/extractor/slutload.py +++ b/youtube_dl/extractor/slutload.py @@ -3,9 +3,6 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - ExtractorError, -) class SlutloadIE(InfoExtractor): diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 25515f068..7aa100fb2 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -1,7 +1,6 @@ # encoding: utf-8 from __future__ import unicode_literals -import json import re import itertools From 617c0b2239f565d512a175d7fce4b5a6b3835b4c Mon Sep 17 00:00:00 2001 From: marcwebbie <marcwebbie@gmail.com> Date: Sat, 7 Jun 2014 23:09:45 -0300 Subject: [PATCH 079/340] [GorillaVid] Added GorillaVid extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/gorillavid.py | 38 ++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 youtube_dl/extractor/gorillavid.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 15a42ce44..6c9a7593a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -109,6 +109,7 @@ from .gdcvault import GDCVaultIE from .generic import GenericIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE +from .gorillavid import GorillaVidIE from .hark import HarkIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py new file mode 100644 index 000000000..bdf6e30c2 --- /dev/null +++ b/youtube_dl/extractor/gorillavid.py @@ -0,0 +1,38 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +class GorillaVidIE(InfoExtractor): + _VALID_URL = r'https?://(?:www.)?gorillavid.in/(?:embed-)?(?P<id>\w+)(?:\-\d+x\d+)?.html' + _TEST = { + 'url': "http://gorillavid.in/kdk7i5r1p5ye.html", + 'md5': '5a01b05ed3da82a10c6659e954b80108', + 'info_dict': { + 'id': 'kdk7i5r1p5ye', + 'ext': 'flv', + 'title': 'Full House 1x16 - But Seriously, Folks.avi', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r"name=['\"]fname['\"]\s+value=['\"](.*?)['\"]", webpage, u"video title") + + # download embed page again with cookies to get url + embed_url = "http://gorillavid.in/embed-{0}-960x480.html".format(video_id) + webpage = self._download_webpage(embed_url, video_id, note=u'Downloading webpage again (with cookie)') + url = self._html_search_regex(r'file:\s+["\'](http://.*?video.\w{3})["\']', webpage, url) + + info_dict = { + 'id': video_id, + 'title': title, + 'url': url, + } + + return info_dict From 77abae55df363fc005ad664a7fd1946e8866b43f Mon Sep 17 00:00:00 2001 From: marcwebbie <marcwebbie@gmail.com> Date: Sun, 8 Jun 2014 03:13:45 -0300 Subject: [PATCH 080/340] Changed video url to a public video --- youtube_dl/extractor/gorillavid.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py index bdf6e30c2..7e8b9f706 100644 --- a/youtube_dl/extractor/gorillavid.py +++ b/youtube_dl/extractor/gorillavid.py @@ -6,14 +6,14 @@ import re from .common import InfoExtractor class GorillaVidIE(InfoExtractor): - _VALID_URL = r'https?://(?:www.)?gorillavid.in/(?:embed-)?(?P<id>\w+)(?:\-\d+x\d+)?.html' + _VALID_URL = r'https?://(?:www.)?gorillavid.in/(?:embed-)?(?P<id>\w+)(?:\-\d+x\d+)?(?:.html)?' _TEST = { - 'url': "http://gorillavid.in/kdk7i5r1p5ye.html", - 'md5': '5a01b05ed3da82a10c6659e954b80108', + 'url': "http://gorillavid.in/z08zf8le23c6", + 'md5': 'c9e293ca74d46cad638e199c3f3fe604', 'info_dict': { - 'id': 'kdk7i5r1p5ye', - 'ext': 'flv', - 'title': 'Full House 1x16 - But Seriously, Folks.avi', + 'id': 'z08zf8le23c6', + 'ext': 'mp4', + 'title': 'Say something nice', } } From 702e5220444a970cd4d05ec95deb478391e365cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Jun 2014 22:16:48 +0700 Subject: [PATCH 081/340] [teachertube] Fix extraction for Python 3 --- youtube_dl/extractor/teachertube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index 6d52763f9..4d9666c6b 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -55,11 +55,13 @@ class TeacherTubeIE(InfoExtractor): quality = qualities(['mp3', 'flv', 'mp4']) + _, media_urls = zip(*re.findall(r'([\'"])file\1\s*:\s*"([^"]+)"', webpage)) + formats = [ { 'url': media_url, 'quality': quality(determine_ext(media_url)) - } for media_url in set(zip(*re.findall(r'([\'"])file\1\s*:\s*"([^"]+)"', webpage))[1]) + } for media_url in set(media_urls) ] self._sort_formats(formats) From 15e423407f4bd6a3035335ec8fb63dc8ef88db52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Jun 2014 22:41:24 +0700 Subject: [PATCH 082/340] [dreisat] Fix thumbnails' width and height --- youtube_dl/extractor/dreisat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 0b11d1f10..011264eca 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -32,8 +32,8 @@ class DreiSatIE(InfoExtractor): thumbnail_els = details_doc.findall('.//teaserimage') thumbnails = [{ - 'width': te.attrib['key'].partition('x')[0], - 'height': te.attrib['key'].partition('x')[2], + 'width': int(te.attrib['key'].partition('x')[0]), + 'height': int(te.attrib['key'].partition('x')[2]), 'url': te.text, } for te in thumbnail_els] From e0b4cc489fea7346a3149f003b4fab7ba3bc5916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Jun 2014 22:45:12 +0700 Subject: [PATCH 083/340] [dreisat] Modernize --- youtube_dl/extractor/dreisat.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py index 011264eca..69ca75423 100644 --- a/youtube_dl/extractor/dreisat.py +++ b/youtube_dl/extractor/dreisat.py @@ -1,34 +1,32 @@ -# coding: utf-8 +from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import ( - unified_strdate, -) +from ..utils import unified_strdate class DreiSatIE(InfoExtractor): IE_NAME = '3sat' _VALID_URL = r'(?:http://)?(?:www\.)?3sat\.de/mediathek/(?:index\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)$' _TEST = { - u"url": u"http://www.3sat.de/mediathek/index.php?obj=36983", - u'file': u'36983.mp4', - u'md5': u'9dcfe344732808dbfcc901537973c922', - u'info_dict': { - u"title": u"Kaffeeland Schweiz", - u"description": u"Über 80 Kaffeeröstereien liefern in der Schweiz das Getränk, in das das Land so vernarrt ist: Mehr als 1000 Tassen trinkt ein Schweizer pro Jahr. SCHWEIZWEIT nimmt die Kaffeekultur unter die...", - u"uploader": u"3sat", - u"upload_date": u"20130622" + 'url': 'http://www.3sat.de/mediathek/index.php?obj=36983', + 'md5': '9dcfe344732808dbfcc901537973c922', + 'info_dict': { + 'id': '36983', + 'ext': 'mp4', + 'title': 'Kaffeeland Schweiz', + 'description': 'md5:cc4424b18b75ae9948b13929a0814033', + 'uploader': '3sat', + 'upload_date': '20130622' } } - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id - details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details') + details_doc = self._download_xml(details_url, video_id, 'Downloading video details') thumbnail_els = details_doc.findall('.//teaserimage') thumbnails = [{ From 09ffa08ba1d65cb2ae6912053dd62ada9fd4ef24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 8 Jun 2014 23:05:20 +0700 Subject: [PATCH 084/340] [veoh] Capture error message --- youtube_dl/extractor/veoh.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index d16993daf..fb132aef6 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..utils import ( compat_urllib_request, int_or_none, + ExtractorError, ) @@ -94,8 +95,12 @@ class VeohIE(InfoExtractor): if video_id.startswith('v'): rsp = self._download_xml( r'http://www.veoh.com/api/findByPermalink?permalink=%s' % video_id, video_id, 'Downloading video XML') - if rsp.get('stat') == 'ok': + stat = rsp.get('stat') + if stat == 'ok': return self._extract_video(rsp.find('./videoList/video')) + elif stat == 'fail': + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, rsp.find('./errorList/error').get('errorMessage')), expected=True) webpage = self._download_webpage(url, video_id) age_limit = 0 From 3048e82a94bc29a5bba56688dbf824380442405a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 Jun 2014 20:37:04 +0700 Subject: [PATCH 085/340] [nuvid] Improve extraction --- youtube_dl/extractor/nuvid.py | 53 ++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/nuvid.py b/youtube_dl/extractor/nuvid.py index e3db9fe8c..fa2ec19cf 100644 --- a/youtube_dl/extractor/nuvid.py +++ b/youtube_dl/extractor/nuvid.py @@ -3,6 +3,11 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ( + parse_duration, + unified_strdate, + compat_urllib_request, +) class NuvidIE(InfoExtractor): @@ -13,8 +18,10 @@ class NuvidIE(InfoExtractor): 'info_dict': { 'id': '1310741', 'ext': 'mp4', - "title": "Horny babes show their awesome bodeis and", - "age_limit": 18, + 'title': 'Horny babes show their awesome bodeis and', + 'duration': 129, + 'upload_date': '20140508', + 'age_limit': 18, } } @@ -22,27 +29,41 @@ class NuvidIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - murl = url.replace('://www.', '://m.') - webpage = self._download_webpage(murl, video_id) + formats = [] + for dwnld_speed, format_id in [(0, '3gp'), (5, 'mp4')]: + request = compat_urllib_request.Request( + 'http://m.nuvid.com/play/%s' % video_id) + request.add_header('Cookie', 'skip_download_page=1; dwnld_speed=%d; adv_show=1' % dwnld_speed) + webpage = self._download_webpage( + request, video_id, 'Downloading %s page' % format_id) + video_url = self._html_search_regex( + r'<a href="([^"]+)"\s*>Continue to watch video', webpage, '%s video URL' % format_id, fatal=False) + if not video_url: + continue + formats.append({ + 'url': video_url, + 'format_id': format_id, + }) + + webpage = self._download_webpage( + 'http://m.nuvid.com/video/%s' % video_id, video_id, 'Downloading video page') title = self._html_search_regex( - r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', - webpage, 'title').strip() - - url_end = self._html_search_regex( - r'href="(/[^"]+)"[^>]*data-link_type="mp4"', - webpage, 'video_url') - video_url = 'http://m.nuvid.com' + url_end - + r'<div class="title">\s+<h2[^>]*>([^<]+)</h2>', webpage, 'title').strip() thumbnail = self._html_search_regex( r'href="(/thumbs/[^"]+)"[^>]*data-link_type="thumbs"', webpage, 'thumbnail URL', fatal=False) + duration = parse_duration(self._html_search_regex( + r'Length:\s*<span>(\d{2}:\d{2})</span>',webpage, 'duration', fatal=False)) + upload_date = unified_strdate(self._html_search_regex( + r'Added:\s*<span>(\d{4}-\d{2}-\d{2})</span>', webpage, 'upload date', fatal=False)) return { 'id': video_id, - 'url': video_url, - 'ext': 'mp4', 'title': title, - 'thumbnail': thumbnail, + 'thumbnail': 'http://m.nuvid.com/%s' % thumbnail, + 'duration': duration, + 'upload_date': upload_date, 'age_limit': 18, - } + 'formats': formats, + } \ No newline at end of file From 828553b614e091c181a519db9c451c133011016b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 9 Jun 2014 20:41:33 +0700 Subject: [PATCH 086/340] [nuvid] Remove superfluous slash --- youtube_dl/extractor/nuvid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nuvid.py b/youtube_dl/extractor/nuvid.py index fa2ec19cf..280328b78 100644 --- a/youtube_dl/extractor/nuvid.py +++ b/youtube_dl/extractor/nuvid.py @@ -61,7 +61,7 @@ class NuvidIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'thumbnail': 'http://m.nuvid.com/%s' % thumbnail, + 'thumbnail': 'http://m.nuvid.com%s' % thumbnail, 'duration': duration, 'upload_date': upload_date, 'age_limit': 18, From 23566e0d783367dc92c0bfc7f907ef63e1ade658 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 9 Jun 2014 20:23:20 +0200 Subject: [PATCH 087/340] rtmp and hls downloaders: Clarify error message when the external tools are not installed Ask to install them, as we do in the postprocessor. We get some reports with it, like #3061 or #3048. --- youtube_dl/downloader/hls.py | 2 +- youtube_dl/downloader/rtmp.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 9d407fe6e..9f29e2f81 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -25,7 +25,7 @@ class HlsFD(FileDownloader): except (OSError, IOError): pass else: - self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found') + self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found. Please install one.') cmd = [program] + args retval = subprocess.call(cmd) diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index cc6a84106..68646709a 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -106,7 +106,7 @@ class RtmpFD(FileDownloader): try: subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) except (OSError, IOError): - self.report_error('RTMP download detected but "rtmpdump" could not be run') + self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.') return False # Download using rtmpdump. rtmpdump returns exit code 2 when From 2b88feedf7993c24b03e0a7ff169a548794de70c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 9 Jun 2014 22:06:45 +0200 Subject: [PATCH 088/340] [generic] Add support for <embed YouTube --- youtube_dl/extractor/generic.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 38a357d3b..34d55297c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -260,7 +260,24 @@ class GenericIE(InfoExtractor): 'uploader': 'Spi0n', }, 'add_ie': ['Dailymotion'], - } + }, + # YouTube embed + { + 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html', + 'info_dict': { + 'id': 'FXRb4ykk4S0', + 'ext': 'mp4', + 'title': 'The NBL Auction 2014', + 'uploader': 'BADMINTON England', + 'uploader_id': 'BADMINTONEvents', + 'upload_date': '20140603', + 'description': 'md5:9ef128a69f1e262a700ed83edb163a73', + }, + 'add_ie': ['Youtube'], + 'params': { + 'skip_download': True, + } + }, ] def report_download_webpage(self, video_id): @@ -478,8 +495,13 @@ class GenericIE(InfoExtractor): # Look for embedded YouTube player matches = re.findall(r'''(?x) - (?:<iframe[^>]+?src=|embedSWF\(\s*) - (["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/ + (?: + <iframe[^>]+?src=| + <embed[^>]+?src=| + embedSWF\(?:\s* + ) + (["\']) + (?P<url>(?:https?:)?//(?:www\.)?youtube\.com/ (?:embed|v)/.+?) \1''', webpage) if matches: From 2656f4eb6a125aea5d5549febf4ed9087ff543b2 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 9 Jun 2014 22:30:57 +0200 Subject: [PATCH 089/340] [hypem] Modernize --- youtube_dl/extractor/hypem.py | 68 +++++++++++++++++------------------ 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/hypem.py b/youtube_dl/extractor/hypem.py index 9bd06e7c7..6a95da900 100644 --- a/youtube_dl/extractor/hypem.py +++ b/youtube_dl/extractor/hypem.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import json import re import time @@ -13,59 +15,55 @@ from ..utils import ( class HypemIE(InfoExtractor): - """Information Extractor for hypem""" - _VALID_URL = r'(?:http://)?(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)' + _VALID_URL = r'http://(?:www\.)?hypem\.com/track/([^/]+)/([^/]+)' _TEST = { - u'url': u'http://hypem.com/track/1v6ga/BODYWORK+-+TAME', - u'file': u'1v6ga.mp3', - u'md5': u'b9cc91b5af8995e9f0c1cee04c575828', - u'info_dict': { - u"title": u"Tame" + 'url': 'http://hypem.com/track/1v6ga/BODYWORK+-+TAME', + 'md5': 'b9cc91b5af8995e9f0c1cee04c575828', + 'info_dict': { + 'id': '1v6ga', + 'ext': 'mp3', + 'title': 'Tame', + 'uploader': 'BODYWORK', } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) track_id = mobj.group(1) data = {'ax': 1, 'ts': time.time()} data_encoded = compat_urllib_parse.urlencode(data) complete_url = url + "?" + data_encoded request = compat_urllib_request.Request(complete_url) - response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage with the url') + response, urlh = self._download_webpage_handle( + request, track_id, 'Downloading webpage with the url') cookie = urlh.headers.get('Set-Cookie', '') - self.report_extraction(track_id) - - html_tracks = self._html_search_regex(r'<script type="application/json" id="displayList-data">(.*?)</script>', - response, u'tracks', flags=re.MULTILINE|re.DOTALL).strip() + html_tracks = self._html_search_regex( + r'(?ms)<script type="application/json" id="displayList-data">\s*(.*?)\s*</script>', + response, 'tracks') try: track_list = json.loads(html_tracks) - track = track_list[u'tracks'][0] + track = track_list['tracks'][0] except ValueError: - raise ExtractorError(u'Hypemachine contained invalid JSON.') + raise ExtractorError('Hypemachine contained invalid JSON.') - key = track[u"key"] - track_id = track[u"id"] - artist = track[u"artist"] - title = track[u"song"] + key = track['key'] + track_id = track['id'] + artist = track['artist'] + title = track['song'] - serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key)) - request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'}) + serve_url = "http://hypem.com/serve/source/%s/%s" % (track_id, key) + request = compat_urllib_request.Request( + serve_url, '', {'Content-Type': 'application/json'}) request.add_header('cookie', cookie) - song_data_json = self._download_webpage(request, track_id, u'Downloading metadata') - try: - song_data = json.loads(song_data_json) - except ValueError: - raise ExtractorError(u'Hypemachine contained invalid JSON.') - final_url = song_data[u"url"] + song_data = self._download_json(request, track_id, 'Downloading metadata') + final_url = song_data["url"] - return [{ - 'id': track_id, - 'url': final_url, - 'ext': "mp3", - 'title': title, - 'artist': artist, - }] + return { + 'id': track_id, + 'url': final_url, + 'ext': 'mp3', + 'title': title, + 'uploader': artist, + } From 826ec77fb2843357601e795e812fe04bfd6990d1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 9 Jun 2014 23:06:25 +0200 Subject: [PATCH 090/340] [Vulture] Add support for vulture.com --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/generic.py | 8 ++++ youtube_dl/extractor/vulture.py | 69 ++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+) create mode 100644 youtube_dl/extractor/vulture.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 15a42ce44..45cc479e2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -332,6 +332,7 @@ from .viki import VikiIE from .vk import VKIE from .vube import VubeIE from .vuclip import VuClipIE +from .vulture import VultureIE from .washingtonpost import WashingtonPostIE from .wat import WatIE from .wdr import ( diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 34d55297c..3105b47ab 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -668,6 +668,14 @@ class GenericIE(InfoExtractor): url = unescapeHTML(mobj.group('url')) return self.url_result(url) + # Look for embedded vulture.com player + mobj = re.search( + r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"', + webpage) + if mobj is not None: + url = unescapeHTML(mobj.group('url')) + return self.url_result(url, ie='Vulture') + # Start with something easy: JW Player in SWFObject found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if not found: diff --git a/youtube_dl/extractor/vulture.py b/youtube_dl/extractor/vulture.py new file mode 100644 index 000000000..1eb24a3d6 --- /dev/null +++ b/youtube_dl/extractor/vulture.py @@ -0,0 +1,69 @@ +from __future__ import unicode_literals + +import json +import os.path +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_iso8601, +) + + +class VultureIE(InfoExtractor): + IE_NAME = 'vulture.com' + _VALID_URL = r'https?://video\.vulture\.com/video/(?P<display_id>[^/]+)/' + _TEST = { + 'url': 'http://video.vulture.com/video/Mindy-Kaling-s-Harvard-Speech/player?layout=compact&read_more=1', + 'md5': '8d997845642a2b5152820f7257871bc8', + 'info_dict': { + 'id': '6GHRQL3RV7MSD1H4', + 'ext': 'mp4', + 'title': 'kaling-speech-2-MAGNIFY STANDARD CONTAINER REVISED', + 'uploader_id': 'Sarah', + 'thumbnail': 're:^http://.*\.jpg$', + 'timestamp': 1401288564, + 'upload_date': '20140528', + 'description': 'Uplifting and witty, as predicted.', + 'duration': 1015, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') + + webpage = self._download_webpage(url, display_id) + query_string = self._search_regex( + r"queryString\s*=\s*'([^']+)'", webpage, 'query string') + video_id = self._search_regex( + r'content=([^&]+)', query_string, 'video ID') + query_url = 'http://video.vulture.com/embed/player/container/1000/1000/?%s' % query_string + + query_webpage = self._download_webpage( + query_url, display_id, note='Downloading query page') + params_json = self._search_regex( + r'(?sm)new MagnifyEmbeddablePlayer\({.*?contentItem:\s*(\{.*?\})\n,\n', + query_webpage, + 'player params') + params = json.loads(params_json) + + upload_timestamp = parse_iso8601(params['posted'].replace(' ', 'T')) + uploader_id = params.get('user', {}).get('handle') + + media_item = params['media_item'] + title = os.path.splitext(media_item['title'])[0] + duration = int_or_none(media_item.get('duration_seconds')) + + return { + 'id': video_id, + 'display_id': display_id, + 'url': media_item['pipeline_xid'], + 'title': title, + 'timestamp': upload_timestamp, + 'thumbnail': params.get('thumbnail_url'), + 'uploader_id': uploader_id, + 'description': params.get('description'), + 'duration': duration, + } From d5e944359e269f0b478595329872e3277f651c36 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 9 Jun 2014 23:14:04 +0200 Subject: [PATCH 091/340] Remove unused import --- youtube_dl/extractor/hypem.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/hypem.py b/youtube_dl/extractor/hypem.py index 6a95da900..6d0d847c6 100644 --- a/youtube_dl/extractor/hypem.py +++ b/youtube_dl/extractor/hypem.py @@ -6,7 +6,6 @@ import time from .common import InfoExtractor from ..utils import ( - compat_str, compat_urllib_parse, compat_urllib_request, From 9706f3f802e5ab079459b5dda6f4b711f39f5fd3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 9 Jun 2014 23:16:37 +0200 Subject: [PATCH 092/340] release 2014.06.09 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 6fe7c7b25..0c9dd6895 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.06.07' +__version__ = '2014.06.09' From 3141feb73bb3c3a6d89e86f17859e8b4beab8a75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 10 Jun 2014 19:37:38 +0700 Subject: [PATCH 093/340] [ndtv] Fix title extraction and modernize --- youtube_dl/extractor/ndtv.py | 46 ++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/ndtv.py b/youtube_dl/extractor/ndtv.py index d81df3c10..95e7d63aa 100644 --- a/youtube_dl/extractor/ndtv.py +++ b/youtube_dl/extractor/ndtv.py @@ -1,22 +1,28 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor -from ..utils import month_by_name +from ..utils import ( + month_by_name, + int_or_none, +) class NDTVIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)' _TEST = { - u"url": u"http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710", - u"file": u"300710.mp4", - u"md5": u"39f992dbe5fb531c395d8bbedb1e5e88", - u"info_dict": { - u"title": u"NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal", - u"description": u"In an exclusive interview to NDTV, Aam Aadmi Party's Arvind Kejriwal says it makes no difference to him that Rahul Gandhi said the Congress needs to learn from his party.", - u"upload_date": u"20131208", - u"duration": 1327, - u"thumbnail": u"http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg", + 'url': 'http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710', + 'md5': '39f992dbe5fb531c395d8bbedb1e5e88', + 'info_dict': { + 'id': '300710', + 'ext': 'mp4', + 'title': "NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal", + 'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02', + 'upload_date': '20131208', + 'duration': 1327, + 'thumbnail': 'http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg', }, } @@ -27,13 +33,12 @@ class NDTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) filename = self._search_regex( - r"__filename='([^']+)'", webpage, u'video filename') - video_url = (u'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % + r"__filename='([^']+)'", webpage, 'video filename') + video_url = ('http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % filename) - duration_str = filename = self._search_regex( - r"__duration='([^']+)'", webpage, u'duration', fatal=False) - duration = None if duration_str is None else int(duration_str) + duration = int_or_none(self._search_regex( + r"__duration='([^']+)'", webpage, 'duration', fatal=False)) date_m = re.search(r'''(?x) <p\s+class="vod_dateline">\s* @@ -41,7 +46,7 @@ class NDTVIE(InfoExtractor): (?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+) ''', webpage) upload_date = None - assert date_m + if date_m is not None: month = month_by_name(date_m.group('monthname')) if month is not None: @@ -49,14 +54,19 @@ class NDTVIE(InfoExtractor): date_m.group('year'), month, int(date_m.group('day'))) description = self._og_search_description(webpage) - READ_MORE = u' (Read more)' + READ_MORE = ' (Read more)' if description.endswith(READ_MORE): description = description[:-len(READ_MORE)] + title = self._og_search_title(webpage) + TITLE_SUFFIX = ' - NDTV' + if title.endswith(TITLE_SUFFIX): + title = title[:-len(TITLE_SUFFIX)] + return { 'id': video_id, 'url': video_url, - 'title': self._og_search_title(webpage), + 'title': title, 'description': description, 'thumbnail': self._og_search_thumbnail(webpage), 'duration': duration, From fdb9aebead76eb7673ea2867794e0020ac0dfd68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 11 Jun 2014 18:20:14 +0700 Subject: [PATCH 094/340] [tube8] Update test and modernize --- youtube_dl/extractor/tube8.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 36bc36ad8..08a48c05a 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -17,9 +17,10 @@ class Tube8IE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/){2}(?P<id>\d+)' _TEST = { 'url': 'http://www.tube8.com/teen/kasia-music-video/229795/', - 'file': '229795.mp4', - 'md5': 'e9e0b0c86734e5e3766e653509475db0', + 'md5': '44bf12b98313827dd52d35b8706a4ea0', 'info_dict': { + 'id': '229795', + 'ext': 'mp4', 'description': 'hot teen Kasia grinding', 'uploader': 'unknown', 'title': 'Kasia music video', From 8f93030c850a41f638ad2c5f48bbc9929bd38731 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 11 Jun 2014 18:38:13 +0700 Subject: [PATCH 095/340] [blinkx] Modernize --- youtube_dl/extractor/blinkx.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py index 38ccd957f..7d558e262 100644 --- a/youtube_dl/extractor/blinkx.py +++ b/youtube_dl/extractor/blinkx.py @@ -4,9 +4,7 @@ import json import re from .common import InfoExtractor -from ..utils import ( - remove_start, -) +from ..utils import remove_start class BlinkxIE(InfoExtractor): @@ -15,9 +13,10 @@ class BlinkxIE(InfoExtractor): _TEST = { 'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB', - 'file': '8aQUy7GV.mp4', 'md5': '2e9a07364af40163a908edbf10bb2492', 'info_dict': { + 'id': '8aQUy7GV', + 'ext': 'mp4', 'title': 'Police Car Rolls Away', 'uploader': 'stupidvideos.com', 'upload_date': '20131215', @@ -27,6 +26,7 @@ class BlinkxIE(InfoExtractor): 'thumbnails': [{ 'width': 100, 'height': 76, + 'resolution': '100x76', 'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg', }], }, @@ -37,7 +37,7 @@ class BlinkxIE(InfoExtractor): video_id = m.group('id') display_id = video_id[:8] - api_url = (u'https://apib4.blinkx.com/api.php?action=play_video&' + + api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' + 'video=%s' % video_id) data_json = self._download_webpage(api_url, display_id) data = json.loads(data_json)['api']['results'][0] @@ -55,13 +55,13 @@ class BlinkxIE(InfoExtractor): duration = m['d'] elif m['type'] == 'youtube': yt_id = m['link'] - self.to_screen(u'Youtube video detected: %s' % yt_id) + self.to_screen('Youtube video detected: %s' % yt_id) return self.url_result(yt_id, 'Youtube', video_id=yt_id) elif m['type'] in ('flv', 'mp4'): vcodec = remove_start(m['vcodec'], 'ff') acodec = remove_start(m['acodec'], 'ff') tbr = (int(m['vbr']) + int(m['abr'])) // 1000 - format_id = u'%s-%sk-%s' % (vcodec, tbr, m['w']) + format_id = '%s-%sk-%s' % (vcodec, tbr, m['w']) formats.append({ 'format_id': format_id, 'url': m['link'], From 4d2f143ce51c9374b570aca749a523569ec391d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 12 Jun 2014 20:33:53 +0700 Subject: [PATCH 096/340] [ted] Update test md5 --- youtube_dl/extractor/ted.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index d260c91c2..bce32a873 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -27,7 +27,7 @@ class TEDIE(SubtitlesInfoExtractor): ''' _TESTS = [{ 'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html', - 'md5': '4ea1dada91e4174b53dac2bb8ace429d', + 'md5': 'fc94ac279feebbce69f21c0c6ee82810', 'info_dict': { 'id': '102', 'ext': 'mp4', From c5469e046a5483bc4e0136c07704d7bdfb0dc1ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 12 Jun 2014 20:42:46 +0700 Subject: [PATCH 097/340] [livestream] Modernize --- youtube_dl/extractor/livestream.py | 63 ++++++++++++++++-------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 1dcd1fb2d..5c71f4f09 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re import json @@ -6,31 +8,34 @@ from ..utils import ( compat_urllib_parse_urlparse, compat_urlparse, xpath_with_ns, + compat_str, ) class LivestreamIE(InfoExtractor): - IE_NAME = u'livestream' + IE_NAME = 'livestream' _VALID_URL = r'http://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>\d+))?/?$' _TEST = { - u'url': u'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', - u'file': u'4719370.mp4', - u'md5': u'0d2186e3187d185a04b3cdd02b828836', - u'info_dict': { - u'title': u'Live from Webster Hall NYC', - u'upload_date': u'20121012', + 'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370', + 'md5': '53274c76ba7754fb0e8d072716f2292b', + 'info_dict': { + 'id': '4719370', + 'ext': 'mp4', + 'title': 'Live from Webster Hall NYC', + 'upload_date': '20121012', } } def _extract_video_info(self, video_data): video_url = video_data.get('progressive_url_hd') or video_data.get('progressive_url') - return {'id': video_data['id'], - 'url': video_url, - 'ext': 'mp4', - 'title': video_data['caption'], - 'thumbnail': video_data['thumbnail_url'], - 'upload_date': video_data['updated_at'].replace('-','')[:8], - } + return { + 'id': compat_str(video_data['id']), + 'url': video_url, + 'ext': 'mp4', + 'title': video_data['caption'], + 'thumbnail': video_data['thumbnail_url'], + 'upload_date': video_data['updated_at'].replace('-', '')[:8], + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -40,36 +45,36 @@ class LivestreamIE(InfoExtractor): if video_id is None: # This is an event page: - config_json = self._search_regex(r'window.config = ({.*?});', - webpage, u'window config') + config_json = self._search_regex( + r'window.config = ({.*?});', webpage, 'window config') info = json.loads(config_json)['event'] videos = [self._extract_video_info(video_data['data']) - for video_data in info['feed']['data'] if video_data['type'] == u'video'] + for video_data in info['feed']['data'] if video_data['type'] == 'video'] return self.playlist_result(videos, info['id'], info['full_name']) else: - og_video = self._og_search_video_url(webpage, name=u'player url') + og_video = self._og_search_video_url(webpage, 'player url') query_str = compat_urllib_parse_urlparse(og_video).query query = compat_urlparse.parse_qs(query_str) api_url = query['play_url'][0].replace('.smil', '') - info = json.loads(self._download_webpage(api_url, video_id, - u'Downloading video info')) + info = json.loads(self._download_webpage( + api_url, video_id, 'Downloading video info')) return self._extract_video_info(info) # The original version of Livestream uses a different system class LivestreamOriginalIE(InfoExtractor): - IE_NAME = u'livestream:original' + IE_NAME = 'livestream:original' _VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)' _TEST = { - u'url': u'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', - u'info_dict': { - u'id': u'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', - u'ext': u'flv', - u'title': u'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital', + 'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', + 'info_dict': { + 'id': 'pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', + 'ext': 'flv', + 'title': 'Spark 1 (BitCoin) with Cameron Winklevoss & Tyler Winklevoss of Winklevoss Capital', }, - u'params': { + 'params': { # rtmp - u'skip_download': True, + 'skip_download': True, }, } @@ -84,7 +89,7 @@ class LivestreamOriginalIE(InfoExtractor): ns = {'media': 'http://search.yahoo.com/mrss'} thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url'] # Remove the extension and number from the path (like 1.jpg) - path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, u'path') + path = self._search_regex(r'(user-files/.+)_.*?\.jpg$', thumbnail_url, 'path') return { 'id': video_id, From 0d933b2ad57563b70d725ce03fe0e79c4d84c99e Mon Sep 17 00:00:00 2001 From: Ariset Llerena <irtusb@gmail.com> Date: Thu, 12 Jun 2014 03:27:23 -0400 Subject: [PATCH 098/340] Added vimple.ru support --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/vimple.py | 73 ++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 youtube_dl/extractor/vimple.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 45cc479e2..4b7900b4f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -324,6 +324,7 @@ from .vimeo import ( VimeoReviewIE, VimeoWatchLaterIE, ) +from .vimple import VimpleIE from .vine import ( VineIE, VineUserIE, diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py new file mode 100644 index 000000000..0f69e7126 --- /dev/null +++ b/youtube_dl/extractor/vimple.py @@ -0,0 +1,73 @@ +# coding: utf-8 +from __future__ import unicode_literals +import re, zlib, base64 +import xml.etree.ElementTree + +from .common import InfoExtractor + +class VimpleIE(InfoExtractor): + IE_DESC = 'Vimple.ru' + _VALID_URL = r'https?://player.vimple.ru/iframe/(?P<id>[a-f0-9]+)' + _TESTS = [ + { + # Quality: Large, from iframe + 'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c', + 'info_dict': { + 'id': 'b132bdfd71b546d3972f9ab9a25f201c', + 'title': 'great-escape-minecraft.flv', + 'ext':'mp4', + 'duration': 352, + 'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c', + }, + } + ] + + #http://jsunpack-n.googlecode.com/svn-history/r63/trunk/swf.py + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + iframe_url = 'http://player.vimple.ru/iframe/%s' % video_id + + iframe = self._download_webpage(iframe_url, video_id, note='Downloading iframe', errnote='unable to fetch iframe') + player_url = self._html_search_regex(r'"(http://player.vimple.ru/flash/.+?)"', iframe, 'player url') + + player = self._request_webpage(player_url, video_id, note='Downloading swf player').read() + + #http://stackoverflow.com/a/6804758 + #http://stackoverflow.com/a/12073686 + player = zlib.decompress(player[8:]) + + + xml_pieces = re.findall(b'([a-zA-Z0-9 =\\+/]{500})', player) + xml_pieces = [piece[1:-1] for piece in xml_pieces] + + xml_data = b''.join(xml_pieces) + xml_data = base64.b64decode(xml_data) + + xml_data = xml.etree.ElementTree.fromstring(xml_data) + + video = xml_data.find('Video') + quality = video.get('quality') + q_tag = video.find(quality.capitalize()) + + formats = [ + { + 'url': q_tag.get('url'), + 'tbr': int(q_tag.get('bitrate')), + 'filesize': int(q_tag.get('filesize')), + 'format_id': quality, + }, + ] + + return { + 'id': video_id, + 'title': video.find('Title').text, + 'formats': formats, + 'thumbnail': video.find('Poster').get('url'), + 'duration': int(video.get('duration')), + 'webpage_url': video.find('Share').get('videoPageUrl'), + } + + From cb437dc2ad41122f1a08595a0829b4d929ddd580 Mon Sep 17 00:00:00 2001 From: Ariset Llerena <irtusb@gmail.com> Date: Thu, 12 Jun 2014 22:33:50 -0400 Subject: [PATCH 099/340] removed extra char in regexp --- youtube_dl/extractor/vimple.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index 0f69e7126..a2f93afe3 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -40,7 +40,7 @@ class VimpleIE(InfoExtractor): player = zlib.decompress(player[8:]) - xml_pieces = re.findall(b'([a-zA-Z0-9 =\\+/]{500})', player) + xml_pieces = re.findall(b'([a-zA-Z0-9 =+/]{500})', player) xml_pieces = [piece[1:-1] for piece in xml_pieces] xml_data = b''.join(xml_pieces) From e66ab17a3683bee57482ccce8f6b0a632f03d78e Mon Sep 17 00:00:00 2001 From: Ariset Llerena <irtusb@gmail.com> Date: Thu, 12 Jun 2014 23:08:06 -0400 Subject: [PATCH 100/340] Verified with pep8 and pyflakes --- youtube_dl/extractor/vimple.py | 49 +++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index a2f93afe3..f3a807cd3 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -1,53 +1,66 @@ # coding: utf-8 from __future__ import unicode_literals -import re, zlib, base64 -import xml.etree.ElementTree +import re +import zlib +import base64 +import xml.etree.ElementTree from .common import InfoExtractor + class VimpleIE(InfoExtractor): IE_DESC = 'Vimple.ru' - _VALID_URL = r'https?://player.vimple.ru/iframe/(?P<id>[a-f0-9]+)' + _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})' _TESTS = [ + # Quality: Large, from iframe { - # Quality: Large, from iframe 'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c', 'info_dict': { 'id': 'b132bdfd71b546d3972f9ab9a25f201c', 'title': 'great-escape-minecraft.flv', - 'ext':'mp4', + 'ext': 'mp4', 'duration': 352, 'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c', - }, - } + }, + }, + # Quality: Medium, from mainpage + { + 'url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', + 'info_dict': { + 'id': 'a15950562888453b8e6f9572dc8600cd', + 'title': 'DB 01', + 'ext': 'flv', + 'duration': 1484, + 'webpage_url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', + } + }, ] - - #http://jsunpack-n.googlecode.com/svn-history/r63/trunk/swf.py + + # http://jsunpack-n.googlecode.com/svn-history/r63/trunk/swf.py def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - + iframe_url = 'http://player.vimple.ru/iframe/%s' % video_id iframe = self._download_webpage(iframe_url, video_id, note='Downloading iframe', errnote='unable to fetch iframe') player_url = self._html_search_regex(r'"(http://player.vimple.ru/flash/.+?)"', iframe, 'player url') - + player = self._request_webpage(player_url, video_id, note='Downloading swf player').read() - #http://stackoverflow.com/a/6804758 - #http://stackoverflow.com/a/12073686 + # http://stackoverflow.com/a/6804758 + # http://stackoverflow.com/a/12073686 player = zlib.decompress(player[8:]) - xml_pieces = re.findall(b'([a-zA-Z0-9 =+/]{500})', player) xml_pieces = [piece[1:-1] for piece in xml_pieces] - + xml_data = b''.join(xml_pieces) xml_data = base64.b64decode(xml_data) - + xml_data = xml.etree.ElementTree.fromstring(xml_data) - + video = xml_data.find('Video') quality = video.get('quality') q_tag = video.find(quality.capitalize()) @@ -69,5 +82,3 @@ class VimpleIE(InfoExtractor): 'duration': int(video.get('duration')), 'webpage_url': video.find('Share').get('videoPageUrl'), } - - From 0c361c41b895147372ecd18d31dfa1c0d3f7b6a4 Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Fri, 13 Jun 2014 08:51:35 +0200 Subject: [PATCH 101/340] [WrzutaIE] Add extractor for wrzuta.pl (fixes #3072) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/wrzuta.py | 80 ++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 youtube_dl/extractor/wrzuta.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 45cc479e2..270925677 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -344,6 +344,7 @@ from .weibo import WeiboIE from .wimp import WimpIE from .wistia import WistiaIE from .worldstarhiphop import WorldStarHipHopIE +from .wrzuta import WrzutaIE from .xbef import XBefIE from .xhamster import XHamsterIE from .xnxx import XNXXIE diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py new file mode 100644 index 000000000..cfa76a6d2 --- /dev/null +++ b/youtube_dl/extractor/wrzuta.py @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + qualities, +) + + +class WrzutaIE(InfoExtractor): + IE_NAME = 'wrzuta.pl' + + _VALID_URL = r'https?://(?P<uploader>[0-9a-zA-Z]+)\.wrzuta\.pl/(?P<typ>film|audio)/(?P<id>[0-9a-zA-Z]+)' + + _TESTS = [{ + 'url': 'http://laboratoriumdextera.wrzuta.pl/film/aq4hIZWrkBu/nike_football_the_last_game', + 'md5': '9e67e05bed7c03b82488d87233a9efe7', + 'info_dict': { + 'id': 'aq4hIZWrkBu', + 'ext': 'mp4', + 'title': 'Nike Football: The Last Game', + 'duration': 307, + 'uploader_id': 'laboratoriumdextera', + 'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd', + }, + }, { + 'url': 'http://w729.wrzuta.pl/audio/9oXJqdcndqv/david_guetta_amp_showtek_ft._vassy_-_bad', + 'md5': '1e546a18e1c22ac6e9adce17b8961ff5', + 'info_dict': { + 'id': '9oXJqdcndqv', + 'ext': 'ogg', + 'title': 'David Guetta & Showtek ft. Vassy - Bad', + 'duration': 270, + 'uploader_id': 'w729', + 'description': 'md5:4628f01c666bbaaecefa83476cfa794a', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + typ = mobj.group('typ') + uploader = mobj.group('uploader') + + webpage = self._download_webpage(url, video_id) + + quality = qualities(['SD', 'MQ', 'HQ', 'HD']) + + audio_table = {'flv': 'mp3', 'webm': 'ogg'} + + embedpage = self._download_json('http://www.wrzuta.pl/npp/embed/%s/%s' % (uploader, video_id), video_id) + + formats = [] + for media in embedpage['url']: + if typ == 'audio': + ext = audio_table[media['type'].split('@')[0]] + else: + ext = media['type'].split('@')[0] + + formats.append({ + 'format_id': '%s_%s' % (ext, media['quality'].lower()), + 'url': media['url'], + 'ext': ext, + 'quality': quality(media['quality']), + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'formats': formats, + 'duration': int_or_none(embedpage['duration']), + 'uploader_id': uploader, + 'description': self._og_search_description(webpage), + } From b0adbe98fb299a06c25f92d945b86438c1b9b0f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 13 Jun 2014 23:44:44 +0700 Subject: [PATCH 102/340] [rai] Add support for Rai websites (Closes #2930) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/rai.py | 120 +++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+) create mode 100644 youtube_dl/extractor/rai.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 45cc479e2..d2c679183 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -216,6 +216,7 @@ from .pornotube import PornotubeIE from .prosiebensat1 import ProSiebenSat1IE from .pyvideo import PyvideoIE from .radiofrance import RadioFranceIE +from .rai import RaiIE from .rbmaradio import RBMARadioIE from .redtube import RedTubeIE from .ringtv import RingTVIE diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py new file mode 100644 index 000000000..74f13a751 --- /dev/null +++ b/youtube_dl/extractor/rai.py @@ -0,0 +1,120 @@ +from __future__ import unicode_literals + +import re + +from .subtitles import SubtitlesInfoExtractor +from ..utils import ( + parse_duration, + unified_strdate, + compat_urllib_parse, +) + + +class RaiIE(SubtitlesInfoExtractor): + _VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)' + _TESTS = [ + { + 'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html', + 'md5': 'c064c0b2d09c278fb293116ef5d0a32d', + 'info_dict': { + 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', + 'ext': 'mp4', + 'title': 'Report del 07/04/2014', + 'description': 'md5:f27c544694cacb46a078db84ec35d2d9', + 'upload_date': '20140407', + 'duration': 6160, + } + }, + { + 'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', + 'md5': '8bb9c151924ce241b74dd52ef29ceafa', + 'info_dict': { + 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', + 'ext': 'mp4', + 'title': 'TG PRIMO TEMPO', + 'description': '', + 'upload_date': '20140612', + 'duration': 1758, + } + }, + { + 'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html', + 'md5': '35cf7c229f22eeef43e48b5cf923bef0', + 'info_dict': { + 'id': '7aafdea9-0e5d-49d5-88a6-7e65da67ae13', + 'ext': 'mp4', + 'title': 'State of the Net, Antonella La Carpia: regole virali', + 'description': 'md5:b0ba04a324126903e3da7763272ae63c', + 'upload_date': '20140613', + } + }, + { + 'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html', + 'md5': '35694f062977fe6619943f08ed935730', + 'info_dict': { + 'id': 'b4a49761-e0cc-4b14-8736-2729f6f73132', + 'ext': 'mp4', + 'title': 'Alluvione in Sardegna e dissesto idrogeologico', + 'description': 'Edizione delle ore 20:30 ', + } + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + media = self._download_json('%s?json' % mobj.group('url'), video_id, 'Downloading video JSON') + + title = media.get('name') + description = media.get('desc') + thumbnail = media.get('image_300') or media.get('image_medium') or media.get('image') + duration = parse_duration(media.get('length')) + uploader = media.get('author') + upload_date = unified_strdate(media.get('date')) + + formats = [] + + for format_id in ['wmv', 'm3u8', 'mediaUri', 'h264']: + media_url = media.get(format_id) + if not media_url: + continue + formats.append({ + 'url': media_url, + 'format_id': format_id, + 'ext': 'mp4', + }) + + if self._downloader.params.get('listsubtitles', False): + page = self._download_webpage(url, video_id) + self._list_available_subtitles(video_id, page) + return + + subtitles = {} + if self._have_to_download_any_subtitles: + page = self._download_webpage(url, video_id) + subtitles = self.extract_subtitles(video_id, page) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'upload_date': upload_date, + 'duration': duration, + 'formats': formats, + 'subtitles': subtitles, + } + + def _get_available_subtitles(self, video_id, webpage): + subtitles = {} + m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage) + if m: + captions = m.group('captions') + STL_EXT = '.stl' + SRT_EXT = '.srt' + if captions.endswith(STL_EXT): + captions = captions[:-len(STL_EXT)] + SRT_EXT + subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions) + return subtitles \ No newline at end of file From a7207cd58038b489bea96397b20b536cfc7a5dac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 14 Jun 2014 17:00:59 +0700 Subject: [PATCH 103/340] [wrzuta] Add age limit --- youtube_dl/extractor/wrzuta.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/wrzuta.py b/youtube_dl/extractor/wrzuta.py index cfa76a6d2..34dd6d952 100644 --- a/youtube_dl/extractor/wrzuta.py +++ b/youtube_dl/extractor/wrzuta.py @@ -77,4 +77,5 @@ class WrzutaIE(InfoExtractor): 'duration': int_or_none(embedpage['duration']), 'uploader_id': uploader, 'description': self._og_search_description(webpage), + 'age_limit': embedpage.get('minimalAge', 0), } From 7d568f5ab894468f36fb046adc1c6b7a178ec132 Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Sat, 14 Jun 2014 13:23:28 +0200 Subject: [PATCH 104/340] [Youtube] Recognize playlists with LL --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7c50881c4..ece62dfce 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1386,13 +1386,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | p/ ) ( - (?:PL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,} + (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,} # Top tracks, they can also include dots |(?:MC)[\w\.]* ) .* | - ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,}) + ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,}) )""" _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' _MORE_PAGES_INDICATOR = r'data-link-type="next"' From a013eba65f22fe45a7a71a6d3c81125f1004a3ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 15 Jun 2014 11:08:24 +0200 Subject: [PATCH 105/340] [brightcove] Improve the 'experienceJSON' regex (#3081) One of the strings may contain ';', we would get an invalid json string. --- youtube_dl/extractor/brightcove.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 3c02c297a..6b98bd278 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -187,7 +187,7 @@ class BrightcoveIE(InfoExtractor): webpage = self._download_webpage(req, video_id) self.report_extraction(video_id) - info = self._search_regex(r'var experienceJSON = ({.*?});', webpage, 'json') + info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json') info = json.loads(info)['data'] video_info = info['programmedContent']['videoPlayer']['mediaDTO'] video_info['_youtubedl_adServerURL'] = info.get('adServerURL') From 5524b242a7ee95e26fa5eaf2939571005b9d6c5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 15 Jun 2014 11:20:40 +0200 Subject: [PATCH 106/340] [brightcove] Add support for renditions with 'remote' set to True (fixes #3081) The url needs to be modified to get the flv video. --- youtube_dl/extractor/brightcove.py | 38 +++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 6b98bd278..b550fad25 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -15,6 +15,7 @@ from ..utils import ( compat_urllib_request, compat_parse_qs, + determine_ext, ExtractorError, unsmuggle_url, unescapeHTML, @@ -70,7 +71,20 @@ class BrightcoveIE(InfoExtractor): 'description': 'md5:363109c02998fee92ec02211bd8000df', 'uploader': 'National Ballet of Canada', }, - } + }, + { + # test flv videos served by akamaihd.net + # From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william + 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3ABC2996102916001&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D', + # The md5 checksum changes on each download + 'info_dict': { + 'id': '2996102916001', + 'ext': 'flv', + 'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', + 'uploader': 'Red Bull TV', + 'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', + }, + }, ] @classmethod @@ -220,11 +234,23 @@ class BrightcoveIE(InfoExtractor): renditions = video_info.get('renditions') if renditions: renditions = sorted(renditions, key=lambda r: r['size']) - info['formats'] = [{ - 'url': rend['defaultURL'], - 'height': rend.get('frameHeight'), - 'width': rend.get('frameWidth'), - } for rend in renditions] + formats = [] + for rend in renditions: + url = rend['defaultURL'] + if rend['remote']: + # This type of renditions are served through akamaihd.net, + # but they don't use f4m manifests + url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB' + ext = 'flv' + else: + ext = determine_ext(url) + formats.append({ + 'url': url, + 'ext': ext, + 'height': rend.get('frameHeight'), + 'width': rend.get('frameWidth'), + }) + info['formats'] = formats elif video_info.get('FLVFullLengthURL') is not None: info.update({ 'url': video_info['FLVFullLengthURL'], From 96bef88f5f0eea0b2c5410a6cbb65cc820d72b30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 15 Jun 2014 11:24:05 +0200 Subject: [PATCH 107/340] [brightcove] Modernize some tests --- youtube_dl/extractor/brightcove.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index b550fad25..acf8b2dc2 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -30,10 +30,11 @@ class BrightcoveIE(InfoExtractor): { # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/ 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001', - 'file': '2371591881001.mp4', 'md5': '5423e113865d26e40624dce2e4b45d95', 'note': 'Test Brightcove downloads and detection in GenericIE', 'info_dict': { + 'id': '2371591881001', + 'ext': 'mp4', 'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', 'uploader': '8TV', 'description': 'md5:a950cc4285c43e44d763d036710cd9cd', @@ -42,8 +43,9 @@ class BrightcoveIE(InfoExtractor): { # From http://medianetwork.oracle.com/video/player/1785452137001 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001', - 'file': '1785452137001.flv', 'info_dict': { + 'id': '1785452137001', + 'ext': 'flv', 'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges', 'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.', 'uploader': 'Oracle', From 35eacd0dae3b6266d379bb511d0ac321f401fba5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 15 Jun 2014 11:37:39 +0200 Subject: [PATCH 108/340] [brightcove] Set the filesize of the formats and use _sort_formats --- youtube_dl/extractor/brightcove.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index acf8b2dc2..419951b62 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -235,7 +235,6 @@ class BrightcoveIE(InfoExtractor): renditions = video_info.get('renditions') if renditions: - renditions = sorted(renditions, key=lambda r: r['size']) formats = [] for rend in renditions: url = rend['defaultURL'] @@ -246,12 +245,15 @@ class BrightcoveIE(InfoExtractor): ext = 'flv' else: ext = determine_ext(url) + size = rend.get('size') formats.append({ 'url': url, 'ext': ext, 'height': rend.get('frameHeight'), 'width': rend.get('frameWidth'), + 'filesize': size if size != 0 else None, }) + self._sort_formats(formats) info['formats'] = formats elif video_info.get('FLVFullLengthURL') is not None: info.update({ From 33bf9033e0302aea4319c7d89ee6d3b830745216 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 16 Jun 2014 10:15:24 +0200 Subject: [PATCH 109/340] release 2014.06.16 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0c9dd6895..56d5d0f2c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.06.09' +__version__ = '2014.06.16' From 2371053565787dc833b04a6d8a45730d61ae7074 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 16 Jun 2014 18:50:15 +0700 Subject: [PATCH 110/340] [rai] Skip test --- youtube_dl/extractor/rai.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index 74f13a751..cb4305349 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -46,7 +46,8 @@ class RaiIE(SubtitlesInfoExtractor): 'title': 'State of the Net, Antonella La Carpia: regole virali', 'description': 'md5:b0ba04a324126903e3da7763272ae63c', 'upload_date': '20140613', - } + }, + 'skip': 'Error 404', }, { 'url': 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-b4a49761-e0cc-4b14-8736-2729f6f73132-tg2.html', From e5da4021eb75b0ea409d7ca1d8ec4b5f585ce762 Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Mon, 16 Jun 2014 16:17:49 +0200 Subject: [PATCH 111/340] [ARDIE] fix formats extraction (fixes #3087) --- youtube_dl/extractor/ard.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index c6d22c029..de8811f23 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -56,7 +56,19 @@ class ARDIE(InfoExtractor): raise ExtractorError('This video is only available after 20:00') formats = [] + for s in streams: + if type(s['_stream']) == list: + reverse = s['_stream'][::-1] + for i in reverse: + quality = s['_quality'] + reverse.index(i) + formats.append({ + 'quality': quality, + 'url': i, + 'format_id': '%s-%s' % (determine_ext(i), quality) + }) + continue + format = { 'quality': s['_quality'], 'url': s['_stream'], From 895ce482b1f3732a5f96014957dac84ec6ca069f Mon Sep 17 00:00:00 2001 From: pulpe <Pulpan3@gmail.com> Date: Mon, 16 Jun 2014 18:14:58 +0200 Subject: [PATCH 112/340] [ARDIE] adjustments suggested by @jaimeMF --- youtube_dl/extractor/ard.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index de8811f23..b36a4d46a 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -59,13 +59,12 @@ class ARDIE(InfoExtractor): for s in streams: if type(s['_stream']) == list: - reverse = s['_stream'][::-1] - for i in reverse: - quality = s['_quality'] + reverse.index(i) + for index, url in enumerate(s['_stream'][::-1]): + quality = s['_quality'] + index formats.append({ 'quality': quality, - 'url': i, - 'format_id': '%s-%s' % (determine_ext(i), quality) + 'url': url, + 'format_id': '%s-%s' % (determine_ext(url), quality) }) continue From e6c9f80c4814de3e2aa60394d08e0d0bcf52de4e Mon Sep 17 00:00:00 2001 From: Anders Einar Hilden <hildenae@gmail.com> Date: Mon, 16 Jun 2014 19:29:23 +0200 Subject: [PATCH 113/340] tv.nrk.no urls mostly contain capital characters Updated regexp and one of the test cases to reflect this. tv.nrksuper.no mostly uses lowercase, so that is still there. --- youtube_dl/extractor/nrk.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 1f066cf05..527e431d7 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -72,11 +72,11 @@ class NRKIE(InfoExtractor): class NRKTVIE(InfoExtractor): - _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-z]{4}\d{8})' + _VALID_URL = r'http://tv\.nrk(?:super)?\.no/(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})' _TESTS = [ { - 'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/muhh48000314/23-05-2014', + 'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', 'md5': '7b96112fbae1faf09a6f9ae1aff6cb84', 'info_dict': { 'id': 'muhh48000314', @@ -141,4 +141,4 @@ class NRKTVIE(InfoExtractor): 'upload_date': upload_date, 'duration': duration, 'formats': formats, - } \ No newline at end of file + } From 70126312570805b09efcbb511d6d049b8de30c01 Mon Sep 17 00:00:00 2001 From: Anders Einar Hilden <hildenae@gmail.com> Date: Mon, 16 Jun 2014 19:37:59 +0200 Subject: [PATCH 114/340] Fix test Didn't use .lower() as planned, so update test with new ID. --- youtube_dl/extractor/nrk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 527e431d7..96f0ae1eb 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -79,7 +79,7 @@ class NRKTVIE(InfoExtractor): 'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', 'md5': '7b96112fbae1faf09a6f9ae1aff6cb84', 'info_dict': { - 'id': 'muhh48000314', + 'id': 'MUHH48000314', 'ext': 'flv', 'title': '20 spørsmål', 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', From 5f28a1acad2f0703b4e3c345ed934202e456790d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= <Pulpan3@gmail.com> Date: Tue, 17 Jun 2014 15:18:46 +0200 Subject: [PATCH 115/340] [GorillaVid] improve extractor --- youtube_dl/extractor/gorillavid.py | 68 +++++++++++++++++++++++------- 1 file changed, 53 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py index 7e8b9f706..aa15cafc3 100644 --- a/youtube_dl/extractor/gorillavid.py +++ b/youtube_dl/extractor/gorillavid.py @@ -1,38 +1,76 @@ -# coding: utf-8 +# -*- coding: utf-8 -*- from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ( + determine_ext, + compat_urllib_parse, + compat_urllib_request, +) + class GorillaVidIE(InfoExtractor): - _VALID_URL = r'https?://(?:www.)?gorillavid.in/(?:embed-)?(?P<id>\w+)(?:\-\d+x\d+)?(?:.html)?' - _TEST = { - 'url': "http://gorillavid.in/z08zf8le23c6", + _VALID_URL = r'https?://(?:www\.)?gorillavid\.in/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?' + + _TESTS = [{ + 'url': 'http://gorillavid.in/06y9juieqpmi', + 'md5': '5ae4a3580620380619678ee4875893ba', + 'info_dict': { + 'id': '06y9juieqpmi', + 'ext': 'flv', + 'title': 'Rebecca Black My Moment Official Music Video Reaction', + 'thumbnail': 're:http://.*\.jpg', + }, + }, { + 'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html', 'md5': 'c9e293ca74d46cad638e199c3f3fe604', 'info_dict': { 'id': 'z08zf8le23c6', 'ext': 'mp4', 'title': 'Say something nice', - } - } + 'thumbnail': 're:http://.*\.jpg', + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + url = 'http://gorillavid.in/%s' % video_id + webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r"name=['\"]fname['\"]\s+value=['\"](.*?)['\"]", webpage, u"video title") - # download embed page again with cookies to get url - embed_url = "http://gorillavid.in/embed-{0}-960x480.html".format(video_id) - webpage = self._download_webpage(embed_url, video_id, note=u'Downloading webpage again (with cookie)') - url = self._html_search_regex(r'file:\s+["\'](http://.*?video.\w{3})["\']', webpage, url) + fields = dict(re.findall(r'''(?x)<input\s+ + type="hidden"\s+ + name="([^"]+)"\s+ + (?:id="[^"]+"\s+)? + value="([^"]*)" + ''', webpage)) + + if fields['op'] == 'download1': + post = compat_urllib_parse.urlencode(fields) - info_dict = { + req = compat_urllib_request.Request(url, post) + req.add_header('Content-type', 'application/x-www-form-urlencoded') + + webpage = self._download_webpage(req, video_id, 'Downloading video page') + + title = self._search_regex(r'style="z-index: [0-9]+;">([0-9a-zA-Z ]+)(?:-.+)?</span>', webpage, 'title') + thumbnail = self._search_regex(r'image:\'(http[^\']+)\',', webpage, 'thumbnail') + url = self._search_regex(r'file: \'(http[^\']+)\',', webpage, 'file url') + + formats = [{ + 'format_id': 'sd', + 'url': url, + 'ext': determine_ext(url), + 'quality': 1, + }] + + return { 'id': video_id, 'title': title, - 'url': url, + 'thumbnail': thumbnail, + 'formats': formats, } - - return info_dict From a14e1538fe66c49ca8869681d2bbe60a36bd420d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= <Pulpan3@gmail.com> Date: Tue, 17 Jun 2014 16:03:03 +0200 Subject: [PATCH 116/340] [ustream:channel] replace test for an updated channel --- test/test_playlists.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index 465b07b9e..ee91e412a 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -114,10 +114,10 @@ class TestPlaylists(unittest.TestCase): def test_ustream_channel(self): dl = FakeYDL() ie = UstreamChannelIE(dl) - result = ie.extract('http://www.ustream.tv/channel/young-americans-for-liberty') + result = ie.extract('http://www.ustream.tv/channel/channeljapan') self.assertIsPlaylist(result) - self.assertEqual(result['id'], '5124905') - self.assertTrue(len(result['entries']) >= 6) + self.assertEqual(result['id'], '10874166') + self.assertTrue(len(result['entries']) >= 54) def test_soundcloud_set(self): dl = FakeYDL() From def8b4039f85449eb8aa3e7ad51c706661a1fb75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 18 Jun 2014 18:53:25 +0700 Subject: [PATCH 117/340] [bilibili] Fix extraction --- youtube_dl/extractor/bilibili.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 45067b944..0d5889f5d 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -13,7 +13,7 @@ from ..utils import ( class BiliBiliIE(InfoExtractor): - _VALID_URL = r'http://www\.bilibili\.tv/video/av(?P<id>[0-9]+)/' + _VALID_URL = r'http://www\.bilibili\.(?:tv|com)/video/av(?P<id>[0-9]+)/' _TEST = { 'url': 'http://www.bilibili.tv/video/av1074402/', @@ -56,7 +56,7 @@ class BiliBiliIE(InfoExtractor): 'thumbnailUrl', video_code, 'thumbnail', fatal=False) player_params = compat_parse_qs(self._html_search_regex( - r'<iframe .*?class="player" src="https://secure.bilibili.tv/secure,([^"]+)"', + r'<iframe .*?class="player" src="https://secure\.bilibili\.(?:tv|com)/secure,([^"]+)"', webpage, 'player params')) if 'cid' in player_params: From 38a9339bafaee1ee12db3ba96352c0e3075762ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 19 Jun 2014 19:51:49 +0700 Subject: [PATCH 118/340] [prosiebensat1] Update some regexes --- youtube_dl/extractor/prosiebensat1.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index e4c4ad714..da64a1a7b 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -158,19 +158,19 @@ class ProSiebenSat1IE(InfoExtractor): _CLIPID_REGEXES = [ r'"clip_id"\s*:\s+"(\d+)"', r'clipid: "(\d+)"', - r'clipId=(\d+)', + r'clip[iI]d=(\d+)', ] _TITLE_REGEXES = [ r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', r'<header class="clearfix">\s*<h3>(.+?)</h3>', r'<!-- start video -->\s*<h1>(.+?)</h1>', - r'<div class="ep-femvideos-pi4-video-txt">\s*<h2>(.+?)</h2>', + r'<h1 class="att-name">\s*(.+?)</h1>', ] _DESCRIPTION_REGEXES = [ r'<p itemprop="description">\s*(.+?)</p>', r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>', r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>', - r'<p>(.+?)</p>\s*<div class="ep-femvideos-pi4-video-footer">', + r'<p class="att-description">\s*(.+?)\s*</p>', ] _UPLOAD_DATE_REGEXES = [ r'<meta property="og:published_time" content="(.+?)">', From a23ba9b53ccd963012daf282ffa37ba90b607441 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= <Pulpan3@gmail.com> Date: Thu, 19 Jun 2014 16:32:11 +0200 Subject: [PATCH 119/340] [Steam] update description in test --- youtube_dl/extractor/steam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/steam.py b/youtube_dl/extractor/steam.py index 1d8d57224..af689e2c2 100644 --- a/youtube_dl/extractor/steam.py +++ b/youtube_dl/extractor/steam.py @@ -53,7 +53,7 @@ class SteamIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20140329', 'title': 'FRONTIERS - Final Greenlight Trailer', - 'description': "The final trailer for the Steam Greenlight launch. Hooray, progress! Here's the official Greenlight page: http://steamcommunity.com/sharedfiles/filedetails/?id=242472205", + 'description': 'md5:6df4fe8dd494ae811869672b0767e025', 'uploader': 'AAD Productions', 'uploader_id': 'AtomicAgeDogGames', } From 896bf55352fd2cbd17f9ceb3a67186db51ec77df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= <Pulpan3@gmail.com> Date: Thu, 19 Jun 2014 16:34:48 +0200 Subject: [PATCH 120/340] [LifeNews] update thumbnail in test --- youtube_dl/extractor/lifenews.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dl/extractor/lifenews.py index 7a431a274..8d9491f23 100644 --- a/youtube_dl/extractor/lifenews.py +++ b/youtube_dl/extractor/lifenews.py @@ -24,7 +24,7 @@ class LifeNewsIE(InfoExtractor): 'ext': 'mp4', 'title': 'МВД разыскивает мужчин, оставивших в IKEA сумку с автоматом', 'description': 'Камеры наблюдения гипермаркета зафиксировали троих мужчин, спрятавших оружейный арсенал в камере хранения.', - 'thumbnail': 'http://lifenews.ru/static/posts/2014/1/126342/.video.jpg', + 'thumbnail': 're:http://.*\.jpg', 'upload_date': '20140130', } } From c26e9ac4b2ce104fed39d37f87b1b253c3346a93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 19 Jun 2014 16:36:42 +0200 Subject: [PATCH 121/340] [youtube] Recognize signature functions that contain '$' (fixes #3104) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ece62dfce..d45545ee4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -440,7 +440,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _parse_sig_js(self, jscode): funcname = self._search_regex( - r'signature=([a-zA-Z]+)', jscode, + r'signature=([$a-zA-Z]+)', jscode, u'Initial JS player signature function name') jsi = JSInterpreter(jscode) From d763637f6a02ab1ac0241283be25d0653aac8c71 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 19 Jun 2014 17:13:50 +0200 Subject: [PATCH 122/340] release 2014.06.19 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 56d5d0f2c..a332b5a8e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.06.16' +__version__ = '2014.06.19' From 77245725196717dabd35f4163004a5cfa31db6b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 20 Jun 2014 18:40:47 +0700 Subject: [PATCH 123/340] [noco] Switch to HTTPS (Closes #3116) --- youtube_dl/extractor/noco.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/noco.py b/youtube_dl/extractor/noco.py index d451cd1bf..da203538d 100644 --- a/youtube_dl/extractor/noco.py +++ b/youtube_dl/extractor/noco.py @@ -35,7 +35,7 @@ class NocoIE(InfoExtractor): video_id = mobj.group('id') medias = self._download_json( - 'http://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON') + 'https://api.noco.tv/1.0/video/medias/%s' % video_id, video_id, 'Downloading video JSON') formats = [] @@ -43,7 +43,7 @@ class NocoIE(InfoExtractor): format_id = fmt['quality_key'] file = self._download_json( - 'http://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id), + 'https://api.noco.tv/1.0/video/file/%s/fr/%s' % (format_id.lower(), video_id), video_id, 'Downloading %s video JSON' % format_id) file_url = file['file'] @@ -71,7 +71,7 @@ class NocoIE(InfoExtractor): self._sort_formats(formats) show = self._download_json( - 'http://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0] + 'https://api.noco.tv/1.0/shows/show/%s' % video_id, video_id, 'Downloading show JSON')[0] upload_date = unified_strdate(show['indexed']) uploader = show['partner_name'] From a20575e8aeafeb1aaf70243cce96505b661449e9 Mon Sep 17 00:00:00 2001 From: Elias Probst <mail@eliasprobst.eu> Date: Sat, 21 Jun 2014 00:35:12 +0200 Subject: [PATCH 124/340] Make debug message useful and also report, which URL failed to download. --- youtube_dl/extractor/spiegel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 9156d7faf..94346daf6 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -36,7 +36,7 @@ class SpiegelIE(InfoExtractor): xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml' idoc = self._download_xml( xml_url, video_id, - note='Downloading XML', errnote='Failed to download XML') + note='Downloading XML', errnote='Failed to download XML from "{0}"'.format(xml_url)) formats = [ { From 8bfb6723cb5bd9cb7a4c843e12688b26ab14d17a Mon Sep 17 00:00:00 2001 From: Elias Probst <mail@eliasprobst.eu> Date: Sat, 21 Jun 2014 01:00:48 +0200 Subject: [PATCH 125/340] Extract the base_url for the XML download from the JS snippet's 'server' variable. --- youtube_dl/extractor/spiegel.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 94346daf6..87f1ee694 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -33,7 +33,13 @@ class SpiegelIE(InfoExtractor): video_title = self._html_search_regex( r'<div class="module-title">(.*?)</div>', webpage, 'title') - xml_url = 'http://video2.spiegel.de/flash/' + video_id + '.xml' + base_url = self._search_regex( + r'var\s+server\s+=\s+\"(http://video\d*\.spiegel\.de/flash/\d+/\d+/)\";', + webpage, + 'base_url', + ) + + xml_url = base_url + video_id + '.xml' idoc = self._download_xml( xml_url, video_id, note='Downloading XML', errnote='Failed to download XML from "{0}"'.format(xml_url)) From 98aeac6ea9c6f554fa81f19cfa2a7b67b2d424f2 Mon Sep 17 00:00:00 2001 From: Elias Probst <mail@eliasprobst.eu> Date: Sat, 21 Jun 2014 01:10:10 +0200 Subject: [PATCH 126/340] Use the 'base_url' for building the resulting 'url' as well. --- youtube_dl/extractor/spiegel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 87f1ee694..bf5a5e2a4 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -47,7 +47,7 @@ class SpiegelIE(InfoExtractor): formats = [ { 'format_id': n.tag.rpartition('type')[2], - 'url': 'http://video2.spiegel.de/flash/' + n.find('./filename').text, + 'url': base_url + n.find('./filename').text, 'width': int(n.find('./width').text), 'height': int(n.find('./height').text), 'abr': int(n.find('./audiobitrate').text), From 55c97a03e133012f92ffd9b2070f72234924ae7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 21 Jun 2014 16:31:18 +0700 Subject: [PATCH 127/340] [spiegel] Add description and modernize --- youtube_dl/extractor/spiegel.py | 39 ++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index bf5a5e2a4..340a38440 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -1,3 +1,4 @@ +# encoding: utf-8 from __future__ import unicode_literals import re @@ -9,18 +10,33 @@ class SpiegelIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$' _TESTS = [{ 'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', - 'file': '1259285.mp4', 'md5': '2c2754212136f35fb4b19767d242f66e', 'info_dict': { + 'id': '1259285', + 'ext': 'mp4', 'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv', + 'description': 'md5:8029d8310232196eb235d27575a8b9f4', + 'duration': 49, }, - }, - { + }, { 'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html', - 'file': '1309159.mp4', 'md5': 'f2cdf638d7aa47654e251e1aee360af1', 'info_dict': { + 'id': '1309159', + 'ext': 'mp4', 'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers', + 'description': 'md5:c2322b65e58f385a820c10fa03b2d088', + 'duration': 983, + }, + }, { + 'url': 'http://www.spiegel.de/video/johann-westhauser-videobotschaft-des-hoehlenforschers-video-1502367.html', + 'md5': '54f58ba0e752e3c07bc2a26222dd0acf', + 'info_dict': { + 'id': '1502367', + 'ext': 'mp4', + 'title': 'Videobotschaft: Höhlenforscher Westhauser dankt seinen Rettern', + 'description': 'md5:c6f1ec11413ebd1088b6813943e5fc91', + 'duration': 42, }, }] @@ -30,19 +46,15 @@ class SpiegelIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - video_title = self._html_search_regex( + title = self._html_search_regex( r'<div class="module-title">(.*?)</div>', webpage, 'title') + description = self._html_search_meta('description', webpage, 'description') base_url = self._search_regex( - r'var\s+server\s+=\s+\"(http://video\d*\.spiegel\.de/flash/\d+/\d+/)\";', - webpage, - 'base_url', - ) + r'var\s+server\s*=\s*"([^"]+)\"', webpage, 'server URL') xml_url = base_url + video_id + '.xml' - idoc = self._download_xml( - xml_url, video_id, - note='Downloading XML', errnote='Failed to download XML from "{0}"'.format(xml_url)) + idoc = self._download_xml(xml_url, video_id) formats = [ { @@ -65,7 +77,8 @@ class SpiegelIE(InfoExtractor): return { 'id': video_id, - 'title': video_title, + 'title': title, + 'description': description, 'duration': duration, 'formats': formats, } From 18061bbab046ef0c237b955bb86b7bed3aa97256 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= <Pulpan3@gmail.com> Date: Sat, 21 Jun 2014 12:03:27 +0200 Subject: [PATCH 128/340] [Youtube] add DASH format 272 (fixes #3128) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index d45545ee4..6bdea1c44 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -224,6 +224,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, # Dash webm audio '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 48, 'preference': -50}, From d2824416aa9cc43d92a44a820026378cae70ca9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Jun 2014 01:20:40 +0700 Subject: [PATCH 129/340] [firstpost] Fix title extraction and add description --- youtube_dl/extractor/firstpost.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/firstpost.py b/youtube_dl/extractor/firstpost.py index eccd8dde9..0993af1c9 100644 --- a/youtube_dl/extractor/firstpost.py +++ b/youtube_dl/extractor/firstpost.py @@ -15,6 +15,7 @@ class FirstpostIE(InfoExtractor): 'id': '1025403', 'ext': 'mp4', 'title': 'India to launch indigenous aircraft carrier INS Vikrant today', + 'description': 'md5:feef3041cb09724e0bdc02843348f5f4', } } @@ -22,13 +23,16 @@ class FirstpostIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + page = self._download_webpage(url, video_id) + title = self._html_search_meta('twitter:title', page, 'title') + description = self._html_search_meta('twitter:description', page, 'title') + data = self._download_xml( 'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id, 'Downloading video XML') item = data.find('./playlist/item') thumbnail = item.find('./image').text - title = item.find('./title').text formats = [ { @@ -42,6 +46,7 @@ class FirstpostIE(InfoExtractor): return { 'id': video_id, 'title': title, + 'description': description, 'thumbnail': thumbnail, 'formats': formats, } From 27ec04b232ad92442289911cb179f0d84002663c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= <Pulpan3@gmail.com> Date: Sun, 22 Jun 2014 17:33:27 +0200 Subject: [PATCH 130/340] [BR] replace test --- youtube_dl/extractor/br.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index b5b56ff00..993360714 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -17,15 +17,13 @@ class BRIE(InfoExtractor): _TESTS = [ { - 'url': 'http://www.br.de/mediathek/video/anselm-gruen-114.html', - 'md5': 'c4f83cf0f023ba5875aba0bf46860df2', + 'url': 'http://www.br.de/mediathek/video/sendungen/heimatsound/heimatsound-festival-2014-trailer-100.html', + 'md5': '93556dd2bcb2948d9259f8670c516d59', 'info_dict': { - 'id': '2c8d81c5-6fb7-4a74-88d4-e768e5856532', + 'id': '25e279aa-1ffd-40fd-9955-5325bd48a53a', 'ext': 'mp4', - 'title': 'Feiern und Verzichten', - 'description': 'Anselm Grün: Feiern und Verzichten', - 'uploader': 'BR/Birgit Baier', - 'upload_date': '20140301', + 'title': 'Am 1. und 2. August in Oberammergau', + 'description': 'md5:dfd224e5aa6819bc1fcbb7826a932021', } }, { From 8940c1c0587612d4de57f7c6edace6005a6de71d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 22 Jun 2014 19:19:26 +0200 Subject: [PATCH 131/340] [mtv] Add an extractor for the mtvservices embedded player (closes #2995) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/mtv.py | 39 ++++++++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index dcf64d034..c3160df1e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -170,6 +170,7 @@ from .moviezine import MoviezineIE from .movshare import MovShareIE from .mtv import ( MTVIE, + MTVServicesEmbeddedIE, MTVIggyIE, ) from .musicplayon import MusicPlayOnIE diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index e5ca41b40..af9490ccc 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -22,6 +22,7 @@ def _media_xml_tag(tag): class MTVServicesInfoExtractor(InfoExtractor): _MOBILE_TEMPLATE = None + @staticmethod def _id_from_uri(uri): return uri.split(':')[-1] @@ -35,6 +36,9 @@ class MTVServicesInfoExtractor(InfoExtractor): base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' return base + m.group('finalid') + def _get_feed_url(self, uri): + return self._FEED_URL + def _get_thumbnail_url(self, uri, itemdoc): search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail')) thumb_node = itemdoc.find(search_path) @@ -136,10 +140,10 @@ class MTVServicesInfoExtractor(InfoExtractor): def _get_videos_info(self, uri): video_id = self._id_from_uri(uri) + feed_url = self._get_feed_url(uri) data = compat_urllib_parse.urlencode({'uri': uri}) - idoc = self._download_xml( - self._FEED_URL + '?' + data, video_id, + feed_url + '?' + data, video_id, 'Downloading info', transform_source=fix_xml_ampersands) return [self._get_video_info(item) for item in idoc.findall('.//item')] @@ -160,6 +164,37 @@ class MTVServicesInfoExtractor(InfoExtractor): return self._get_videos_info(mgid) +class MTVServicesEmbeddedIE(MTVServicesInfoExtractor): + IE_NAME = 'mtvservices:embedded' + _VALID_URL = r'https?://media\.mtvnservices\.com/embed/(?P<mgid>.+?)(\?|/|$)' + + _TEST = { + # From http://www.thewrap.com/peter-dinklage-sums-up-game-of-thrones-in-45-seconds-video/ + 'url': 'http://media.mtvnservices.com/embed/mgid:uma:video:mtv.com:1043906/cp~vid%3D1043906%26uri%3Dmgid%3Auma%3Avideo%3Amtv.com%3A1043906', + 'md5': 'cb349b21a7897164cede95bd7bf3fbb9', + 'info_dict': { + 'id': '1043906', + 'ext': 'mp4', + 'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds', + 'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.', + }, + } + + def _get_feed_url(self, uri): + video_id = self._id_from_uri(uri) + site_id = uri.replace(video_id, '') + config_url = 'http://media.mtvnservices.com/pmt/e1/players/{0}/config.xml'.format(site_id) + config_doc = self._download_xml(config_url, video_id) + feed_node = config_doc.find('.//feed') + feed_url = feed_node.text.strip().split('?')[0] + return feed_url + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + mgid = mobj.group('mgid') + return self._get_videos_info(mgid) + + class MTVIE(MTVServicesInfoExtractor): _VALID_URL = r'''(?x)^https?:// (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$| From c5cd249e41a048d0766987aa6b33d49fa64a7d3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 22 Jun 2014 21:38:04 +0200 Subject: [PATCH 132/340] [generic] Extract mtvservices embedded videos --- youtube_dl/extractor/generic.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 3105b47ab..9dd03aba4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -278,6 +278,17 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, + # MTVSercices embed + { + 'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too', + 'md5': '35727f82f58c76d996fc188f9755b0d5', + 'info_dict': { + 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9', + 'ext': 'mp4', + 'title': 'Review', + 'description': 'Mario\'s life in the fast lane has never looked so good.', + }, + }, ] def report_download_webpage(self, video_id): @@ -676,6 +687,14 @@ class GenericIE(InfoExtractor): url = unescapeHTML(mobj.group('url')) return self.url_result(url, ie='Vulture') + # Look for embedded mtvservices player + mobj = re.search( + r'<iframe src="(?P<url>https?://media\.mtvnservices\.com/embed/[^"]+)"', + webpage) + if mobj is not None: + url = unescapeHTML(mobj.group('url')) + return self.url_result(url, ie='MTVServicesEmbedded') + # Start with something easy: JW Player in SWFObject found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if not found: From 9a2dc4f7ac308725afd651b1f0f92291d1906496 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 Jun 2014 03:07:10 +0700 Subject: [PATCH 133/340] [teachertube] Fix extraction --- youtube_dl/extractor/teachertube.py | 31 +++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index 4d9666c6b..b3cb6bd76 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -22,8 +22,8 @@ class TeacherTubeIE(InfoExtractor): 'info_dict': { 'id': '339997', 'ext': 'mp4', - 'title': 'Measures of dispersion from a frequency table_x264', - 'description': 'md5:a3e9853487185e9fcd7181a07164650b', + 'title': 'Measures of dispersion from a frequency table', + 'description': 'Measures of dispersion from a frequency table', 'thumbnail': 're:http://.*\.jpg', }, }, { @@ -33,7 +33,7 @@ class TeacherTubeIE(InfoExtractor): 'id': '340064', 'ext': 'mp4', 'title': 'How to Make Paper Dolls _ Paper Art Projects', - 'description': 'md5:2ca52b20cd727773d1dc418b3d6bd07b', + 'description': 'Learn how to make paper dolls in this simple', 'thumbnail': 're:http://.*\.jpg', }, }, { @@ -43,7 +43,7 @@ class TeacherTubeIE(InfoExtractor): 'id': '8805', 'ext': 'mp3', 'title': 'PER ASPERA AD ASTRA', - 'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNIČKE ŠKOLE PER ASPERA AD ASTRA', + 'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P', }, }] @@ -53,9 +53,19 @@ class TeacherTubeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + title = self._html_search_meta('title', webpage, 'title') + TITLE_SUFFIX = ' - TeacherTube' + if title.endswith(TITLE_SUFFIX): + title = title[:-len(TITLE_SUFFIX)].strip() + + description = self._html_search_meta('description', webpage, 'description') + if description: + description = description.strip() + quality = qualities(['mp3', 'flv', 'mp4']) - _, media_urls = zip(*re.findall(r'([\'"])file\1\s*:\s*"([^"]+)"', webpage)) + media_urls = re.findall(r'data-contenturl="([^"]+)"', webpage) + media_urls.extend(re.findall(r'var\s+filePath\s*=\s*"([^"]+)"', webpage)) formats = [ { @@ -68,10 +78,10 @@ class TeacherTubeIE(InfoExtractor): return { 'id': video_id, - 'title': self._og_search_title(webpage), - 'thumbnail': self._og_search_thumbnail(webpage), + 'title': title, + 'thumbnail': self._html_search_regex(r'var\s+thumbUrl\s*=\s*"([^"]+)"', webpage, 'thumbnail'), 'formats': formats, - 'description': self._og_search_description(webpage), + 'description': description, } @@ -85,8 +95,9 @@ class TeacherTubeClassroomIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) user_id = mobj.group('user') - rss = self._download_xml('http://www.teachertube.com/rssclassroom.php?mode=user&username=%s' % user_id, - user_id, 'Downloading classroom RSS') + rss = self._download_xml( + 'http://www.teachertube.com/rssclassroom.php?mode=user&username=%s' % user_id, + user_id, 'Downloading classroom RSS') entries = [] for url in rss.findall('.//{http://search.yahoo.com/mrss/}player'): From 7575d52a73e4e319351802243677aabddda0a504 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 24 Jun 2014 08:59:40 +0200 Subject: [PATCH 134/340] release 2014.06.24 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a332b5a8e..c91312629 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.06.19' +__version__ = '2014.06.24' From 36ddd8b3f7632f5ca4376a37bb5e0eb6aefb16d7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 24 Jun 2014 09:03:52 +0200 Subject: [PATCH 135/340] release 2014.06.24.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c91312629..77f6083d5 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.06.24' +__version__ = '2014.06.24.1' From 60b2dd1285a7aeb848e0f3c24f5e9480f9cef616 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 24 Jun 2014 10:50:41 +0200 Subject: [PATCH 136/340] [comedycentral] Correct handling when latest tds episode is a special-episode instead of a regular one --- youtube_dl/extractor/comedycentral.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index ba4d73ab8..8af0abade 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -130,7 +130,7 @@ class ComedyCentralShowsIE(InfoExtractor): raise ExtractorError('Invalid redirected URL: ' + url) if mobj.group('episode') == '': raise ExtractorError('Redirected URL is still not specific: ' + url) - epTitle = mobj.group('episode').rpartition('/')[-1] + epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1] mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage) if len(mMovieParams) == 0: From e423e0baaabe16e80af693d1f05ffc560747b3b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 24 Jun 2014 19:34:39 +0700 Subject: [PATCH 137/340] [wistia] Add duration and modernize --- youtube_dl/extractor/wistia.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py index bc31c2e64..e6bfa9e14 100644 --- a/youtube_dl/extractor/wistia.py +++ b/youtube_dl/extractor/wistia.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import json import re @@ -5,14 +7,16 @@ from .common import InfoExtractor class WistiaIE(InfoExtractor): - _VALID_URL = r'^https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)' + _VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)' _TEST = { - u"url": u"http://fast.wistia.net/embed/iframe/sh7fpupwlt", - u"file": u"sh7fpupwlt.mov", - u"md5": u"cafeb56ec0c53c18c97405eecb3133df", - u"info_dict": { - u"title": u"cfh_resourceful_zdkh_final_1" + 'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', + 'md5': 'cafeb56ec0c53c18c97405eecb3133df', + 'info_dict': { + 'id': 'sh7fpupwlt', + 'ext': 'mov', + 'title': 'Being Resourceful', + 'duration': 117, }, } @@ -22,7 +26,7 @@ class WistiaIE(InfoExtractor): webpage = self._download_webpage(url, video_id) data_json = self._html_search_regex( - r'Wistia.iframeInit\((.*?), {}\);', webpage, u'video data') + r'Wistia\.iframeInit\((.*?), {}\);', webpage, 'video data') data = json.loads(data_json) @@ -54,4 +58,5 @@ class WistiaIE(InfoExtractor): 'title': data['name'], 'formats': formats, 'thumbnails': thumbnails, + 'duration': data.get('duration'), } From cea2582df28113e41d37189005c8a1ea12b0d38d Mon Sep 17 00:00:00 2001 From: Peter <vanderlaan.pm@gmail.com> Date: Tue, 24 Jun 2014 17:41:53 +0200 Subject: [PATCH 138/340] [discovery] Change default url URL does a redirect from dsc.discovery.com to www.discovery.com This commit fixes the correct URL. --- youtube_dl/extractor/discovery.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py index 2ae6ecc12..554df6735 100644 --- a/youtube_dl/extractor/discovery.py +++ b/youtube_dl/extractor/discovery.py @@ -7,9 +7,9 @@ from .common import InfoExtractor class DiscoveryIE(InfoExtractor): - _VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?' + _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?' _TEST = { - 'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm', + 'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm', 'md5': 'e12614f9ee303a6ccef415cb0793eba2', 'info_dict': { 'id': '614784', From b7c33124c867a66bb1abfd56b5d3b9fe1e5dba2c Mon Sep 17 00:00:00 2001 From: Michael Smith <crazedpsyc@duckduckgo.com> Date: Tue, 24 Jun 2014 17:55:08 -0600 Subject: [PATCH 139/340] [BlipTV] Allow plus sign in video ID --- youtube_dl/extractor/bliptv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index d4da08991..acfc4ad73 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -15,7 +15,7 @@ from ..utils import ( class BlipTVIE(SubtitlesInfoExtractor): - _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z]+)))' + _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+]+)))' _TESTS = [ { From fd69098a45a85f84455140aa14c7384670eef572 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 25 Jun 2014 19:06:11 +0700 Subject: [PATCH 140/340] [rutube] Update playlist tests --- test/test_playlists.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index ee91e412a..42051fe2a 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -40,6 +40,7 @@ from youtube_dl.extractor import ( KhanAcademyIE, EveryonesMixtapeIE, RutubeChannelIE, + RutubePersonIE, GoogleSearchIE, GenericIE, TEDIE, @@ -256,10 +257,18 @@ class TestPlaylists(unittest.TestCase): def test_rutube_channel(self): dl = FakeYDL() ie = RutubeChannelIE(dl) - result = ie.extract('http://rutube.ru/tags/video/1409') + result = ie.extract('http://rutube.ru/tags/video/1800/') self.assertIsPlaylist(result) - self.assertEqual(result['id'], '1409') - self.assertTrue(len(result['entries']) >= 34) + self.assertEqual(result['id'], '1800') + self.assertTrue(len(result['entries']) >= 68) + + def test_rutube_person(self): + dl = FakeYDL() + ie = RutubePersonIE(dl) + result = ie.extract('http://rutube.ru/video/person/313878/') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], '313878') + self.assertTrue(len(result['entries']) >= 37) def test_multiple_brightcove_videos(self): # https://github.com/rg3/youtube-dl/issues/2283 From 85342674b29e474226c8a137d841fcfe16f004f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= <Pulpan3@gmail.com> Date: Wed, 25 Jun 2014 17:44:19 +0200 Subject: [PATCH 141/340] [Dailymotion] fix uploader name (fixes #3153) --- youtube_dl/extractor/dailymotion.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 55216201f..5d0bfe454 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -150,7 +150,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): return { 'id': video_id, 'formats': formats, - 'uploader': info['owner_screenname'], + 'uploader': info['owner.screenname'], 'upload_date': video_upload_date, 'title': self._og_search_title(webpage), 'subtitles': video_subtitles, From 1044f8afd2156e16e2a0e8d6e656e6aab1f1306b Mon Sep 17 00:00:00 2001 From: pachacamac <pachacamac@users.noreply.github.com> Date: Wed, 25 Jun 2014 18:07:23 +0200 Subject: [PATCH 142/340] [Soundgasm] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/soundgasm.py | 34 +++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 youtube_dl/extractor/soundgasm.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c3160df1e..02143de9e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -255,6 +255,7 @@ from .soundcloud import ( SoundcloudUserIE, SoundcloudPlaylistIE ) +from .soundgasm import SoundgasmIE from .southparkstudios import ( SouthParkStudiosIE, SouthparkDeIE, diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dl/extractor/soundgasm.py new file mode 100644 index 000000000..b9d4c6a56 --- /dev/null +++ b/youtube_dl/extractor/soundgasm.py @@ -0,0 +1,34 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +class SoundgasmIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)' + _TEST = { + 'url': 'http://soundgasm.net/u/ytdl/Piano-sample', + 'md5': '010082a2c802c5275bb00030743e75ad', + 'info_dict': { + 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9', + 'ext': 'm4a', + 'title': 'ytdl_Piano-sample', + 'description': 'Royalty Free Sample Music' + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + audio_title = mobj.group('user') + '_' + mobj.group('title') + webpage = self._download_webpage(url, '') + audio_url = self._html_search_regex(r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL') + audio_id = re.split('\/|\.', audio_url)[-2] + description = self._html_search_regex(r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description', fatal=False, flags=re.DOTALL) + + return { + 'id': audio_id, + 'url': audio_url, + 'title': audio_title, + 'description': description + } From 637b6af80fa6086e7d264f422bffde7694478290 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 25 Jun 2014 21:24:01 +0200 Subject: [PATCH 143/340] release 2014.06.25 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 77f6083d5..aa50aac98 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.06.24.1' +__version__ = '2014.06.25' From ba7aa464de0a0a6596eb5334b7e2491a03dfbc92 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Wed, 25 Jun 2014 23:47:38 +0200 Subject: [PATCH 144/340] [soundgasm] PEP8 and add a display_id (#3155) --- youtube_dl/extractor/soundgasm.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dl/extractor/soundgasm.py index b9d4c6a56..a4f8ce6c3 100644 --- a/youtube_dl/extractor/soundgasm.py +++ b/youtube_dl/extractor/soundgasm.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor + class SoundgasmIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)' _TEST = { @@ -20,14 +21,19 @@ class SoundgasmIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('title') audio_title = mobj.group('user') + '_' + mobj.group('title') - webpage = self._download_webpage(url, '') - audio_url = self._html_search_regex(r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL') + webpage = self._download_webpage(url, display_id) + audio_url = self._html_search_regex( + r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL') audio_id = re.split('\/|\.', audio_url)[-2] - description = self._html_search_regex(r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description', fatal=False, flags=re.DOTALL) + description = self._html_search_regex( + r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description', + fatal=False) return { 'id': audio_id, + 'display_id': display_id, 'url': audio_url, 'title': audio_title, 'description': description From d410fee91d90ebd030fe69148befc0358a07ed2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= <Pulpan3@gmail.com> Date: Thu, 26 Jun 2014 07:35:47 +0200 Subject: [PATCH 145/340] [VideoTt] fix ValueError (#3161) --- youtube_dl/extractor/videott.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/videott.py b/youtube_dl/extractor/videott.py index b5034b02f..a647807d0 100644 --- a/youtube_dl/extractor/videott.py +++ b/youtube_dl/extractor/videott.py @@ -4,7 +4,10 @@ import re import base64 from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import ( + unified_strdate, + int_or_none, +) class VideoTtIE(InfoExtractor): @@ -50,9 +53,9 @@ class VideoTtIE(InfoExtractor): 'thumbnail': settings['config']['thumbnail'], 'upload_date': unified_strdate(video['added']), 'uploader': video['owner'], - 'view_count': int(video['view_count']), - 'comment_count': int(video['comment_count']), - 'like_count': int(video['liked']), - 'dislike_count': int(video['disliked']), + 'view_count': int_or_none(video['view_count']), + 'comment_count': None if video.get('comment_count') == '--' else int_or_none(video['comment_count']), + 'like_count': int_or_none(video['liked']), + 'dislike_count': int_or_none(video['disliked']), 'formats': formats, } \ No newline at end of file From c7df67edbd84606051c84b10339302813f5b0314 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 26 Jun 2014 20:00:47 +0700 Subject: [PATCH 146/340] [teachertube] Improve extraction --- youtube_dl/extractor/teachertube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index b3cb6bd76..c9a3fe571 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -66,6 +66,7 @@ class TeacherTubeIE(InfoExtractor): media_urls = re.findall(r'data-contenturl="([^"]+)"', webpage) media_urls.extend(re.findall(r'var\s+filePath\s*=\s*"([^"]+)"', webpage)) + media_urls.extend(re.findall(r'\'file\'\s*:\s*["\']([^"\']+)["\'],', webpage)) formats = [ { @@ -79,7 +80,7 @@ class TeacherTubeIE(InfoExtractor): return { 'id': video_id, 'title': title, - 'thumbnail': self._html_search_regex(r'var\s+thumbUrl\s*=\s*"([^"]+)"', webpage, 'thumbnail'), + 'thumbnail': self._html_search_regex(r'\'image\'\s*:\s*["\']([^"\']+)["\']', webpage, 'thumbnail'), 'formats': formats, 'description': description, } From f5172a308418a54d077e19495a5c560f2bd644e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 26 Jun 2014 20:01:59 +0700 Subject: [PATCH 147/340] [teachertube] Add support for new URL formats --- youtube_dl/extractor/teachertube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index c9a3fe571..d9868d569 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -14,7 +14,7 @@ class TeacherTubeIE(InfoExtractor): IE_NAME = 'teachertube' IE_DESC = 'teachertube.com videos' - _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=)(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/|audio/)(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997', From 78338f71ca2d96e4bf507c438fbb2751742989b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 26 Jun 2014 16:34:36 +0200 Subject: [PATCH 148/340] [livestream:original] Add support for folder urls (closes #2631) The webpage only contains shortened links for the videos, since the server doesn't support HEAD requests, we use an specific extractor for them. --- test/test_playlists.py | 9 ++++++ youtube_dl/extractor/__init__.py | 6 +++- youtube_dl/extractor/common.py | 3 ++ youtube_dl/extractor/livestream.py | 52 +++++++++++++++++++++++++++--- 4 files changed, 64 insertions(+), 6 deletions(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index 42051fe2a..71dac1b02 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -30,6 +30,7 @@ from youtube_dl.extractor import ( SoundcloudPlaylistIE, TeacherTubeClassroomIE, LivestreamIE, + LivestreamOriginalIE, NHLVideocenterIE, BambuserChannelIE, BandcampAlbumIE, @@ -155,6 +156,14 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['title'], 'TEDCity2.0 (English)') self.assertTrue(len(result['entries']) >= 4) + def test_livestreamoriginal_folder(self): + dl = FakeYDL() + ie = LivestreamOriginalIE(dl) + result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3') + self.assertTrue(len(result['entries']) >= 28) + def test_nhl_videocenter(self): dl = FakeYDL() ie = NHLVideocenterIE(dl) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 02143de9e..a1cdcf0f7 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -147,7 +147,11 @@ from .ku6 import Ku6IE from .la7 import LA7IE from .lifenews import LifeNewsIE from .liveleak import LiveLeakIE -from .livestream import LivestreamIE, LivestreamOriginalIE +from .livestream import ( + LivestreamIE, + LivestreamOriginalIE, + LivestreamShortenerIE, +) from .lynda import ( LyndaIE, LyndaCourseIE diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 49e75405e..e4e4feef9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -459,6 +459,9 @@ class InfoExtractor(object): if secure: regexes = self._og_regexes('video:secure_url') + regexes return self._html_search_regex(regexes, html, name, **kargs) + def _og_search_url(self, html, **kargs): + return self._og_search_property('url', html, **kargs) + def _html_search_meta(self, name, html, display_name=None, fatal=False): if display_name is None: display_name = name diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 5c71f4f09..2c100d424 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -9,6 +9,7 @@ from ..utils import ( compat_urlparse, xpath_with_ns, compat_str, + orderedSet, ) @@ -64,7 +65,10 @@ class LivestreamIE(InfoExtractor): # The original version of Livestream uses a different system class LivestreamOriginalIE(InfoExtractor): IE_NAME = 'livestream:original' - _VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)' + _VALID_URL = r'''(?x)https?://www\.livestream\.com/ + (?P<user>[^/]+)/(?P<type>video|folder) + (?:\?.*?Id=|/)(?P<id>.*?)(&|$) + ''' _TEST = { 'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', 'info_dict': { @@ -78,10 +82,7 @@ class LivestreamOriginalIE(InfoExtractor): }, } - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - user = mobj.group('user') + def _extract_video(self, user, video_id): api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id) info = self._download_xml(api_url, video_id) @@ -99,3 +100,44 @@ class LivestreamOriginalIE(InfoExtractor): 'ext': 'flv', 'thumbnail': thumbnail_url, } + + def _extract_folder(self, url, folder_id): + webpage = self._download_webpage(url, folder_id) + urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage)) + + return { + '_type': 'playlist', + 'id': folder_id, + 'entries': [{ + '_type': 'url', + 'url': video_url, + } for video_url in urls], + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + id = mobj.group('id') + user = mobj.group('user') + url_type = mobj.group('type') + if url_type == 'folder': + return self._extract_folder(url, id) + else: + return self._extract_video(user, id) + + +# The server doesn't support HEAD request, the generic extractor can't detect +# the redirection +class LivestreamShortenerIE(InfoExtractor): + IE_NAME = 'livestream:shortener' + IE_DESC = False # Do not list + _VALID_URL = r'https?://livestre\.am/(?P<id>.+)' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + id = mobj.group('id') + webpage = self._download_webpage(url, id) + + return { + '_type': 'url', + 'url': self._og_search_url(webpage), + } From 4242001863eef690416f0ab2b84b97cfc569847b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 26 Jun 2014 16:44:01 +0200 Subject: [PATCH 149/340] release 2014.06.26 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index aa50aac98..ab076489f 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.06.25' +__version__ = '2014.06.26' From 331ae266ff4486d8ac0ba2fb116f4abf33924054 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 26 Jun 2014 20:30:44 +0200 Subject: [PATCH 150/340] [npo] Add extractor (closes #3145) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/npo.py | 62 ++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 youtube_dl/extractor/npo.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a1cdcf0f7..3855e9b10 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -201,6 +201,7 @@ from .normalboots import NormalbootsIE from .novamov import NovaMovIE from .nowness import NownessIE from .nowvideo import NowVideoIE +from .npo import NPOIE from .nrk import ( NRKIE, NRKTVIE, diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py new file mode 100644 index 000000000..fbcbe1f40 --- /dev/null +++ b/youtube_dl/extractor/npo.py @@ -0,0 +1,62 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + unified_strdate, +) + + +class NPOIE(InfoExtractor): + IE_NAME = 'npo.nl' + _VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)' + + _TEST = { + 'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719', + 'md5': '4b3f9c429157ec4775f2c9cb7b911016', + 'info_dict': { + 'id': 'VPWON_1220719', + 'ext': 'mp4', + 'title': 'Nieuwsuur', + 'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.', + 'upload_date': '20140622', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + metadata = self._download_json( + 'http://e.omroep.nl/metadata/aflevering/%s' % video_id, + video_id, + # We have to remove the javascript callback + transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//epc', r'\1', j) + ) + token_page = self._download_webpage( + 'http://ida.omroep.nl/npoplayer/i.js', + video_id, + note='Downloading token' + ) + token = self._search_regex(r'npoplayer.token = "(.+?)"', token_page, 'token') + streams_info = self._download_json( + 'http://ida.omroep.nl/odi/?prid=%s&puboptions=h264_std&adaptive=yes&token=%s' % (video_id, token), + video_id + ) + + stream_info = self._download_json( + streams_info['streams'][0] + '&type=json', + video_id, + 'Downloading stream info' + ) + + return { + 'id': video_id, + 'title': metadata['titel'], + 'ext': 'mp4', + 'url': stream_info['url'], + 'description': metadata['info'], + 'thumbnail': metadata['images'][-1]['url'], + 'upload_date': unified_strdate(metadata['gidsdatum']), + } From f2b8db57ebb15fa2edc9c7810436618a0725d451 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 27 Jun 2014 20:53:59 +0700 Subject: [PATCH 151/340] [drtv] Add extractor for DR TV (Closes #3126) --- youtube_dl/extractor/drtv.py | 91 ++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 youtube_dl/extractor/drtv.py diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py new file mode 100644 index 000000000..60f073ff0 --- /dev/null +++ b/youtube_dl/extractor/drtv.py @@ -0,0 +1,91 @@ +from __future__ import unicode_literals + +import re + +from .subtitles import SubtitlesInfoExtractor +from .common import ExtractorError +from..utils import parse_iso8601 + + +class DRTVIE(SubtitlesInfoExtractor): + _VALID_URL = r'http://(?:www\.)?dr\.dk/tv/se/[^/]+/(?P<id>[\da-z-]+)' + + _TEST = { + 'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8', + 'md5': '4a7e1dd65cdb2643500a3f753c942f25', + 'info_dict': { + 'id': 'partiets-mand-7-8', + 'ext': 'mp4', + 'title': 'Partiets mand (7:8)', + 'description': 'md5:a684b90a8f9336cd4aab94b7647d7862', + 'timestamp': 1403047940, + 'upload_date': '20140617', + 'duration': 1299.040, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + programcard = self._download_json( + 'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, video_id, 'Downloading video JSON') + + data = programcard['Data'][0] + + title = data['Title'] + description = data['Description'] + timestamp = parse_iso8601(data['CreatedTime'][:-5]) + + thumbnail = None + duration = None + + restricted_to_denmark = False + + formats = [] + subtitles = {} + + for asset in data['Assets']: + if asset['Kind'] == 'Image': + thumbnail = asset['Uri'] + elif asset['Kind'] == 'VideoResource': + duration = asset['DurationInMilliseconds'] / 1000.0 + restricted_to_denmark = asset['RestrictedToDenmark'] + for link in asset['Links']: + target = link['Target'] + uri = link['Uri'] + formats.append({ + 'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri, + 'format_id': target, + 'ext': link['FileFormat'], + 'preference': -1 if target == 'HDS' else -2, + }) + subtitles_list = asset.get('SubtitlesList') + if isinstance(subtitles_list, list): + LANGS = { + 'Danish': 'dk', + } + for subs in subtitles_list: + lang = subs['Language'] + subtitles[LANGS.get(lang, lang)] = subs['Uri'] + + if not formats and restricted_to_denmark: + raise ExtractorError( + 'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True) + + self._sort_formats(formats) + + if self._downloader.params.get('listsubtitles', False): + self._list_available_subtitles(video_id, subtitles) + return + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'formats': formats, + 'subtitles': self.extract_subtitles(video_id, subtitles), + } From a69969ee058d3bcc71c90d547fdece0fdd03f7c0 Mon Sep 17 00:00:00 2001 From: Thomas Jost <schnouki@schnouki.net> Date: Fri, 27 Jun 2014 18:03:20 +0200 Subject: [PATCH 152/340] [Motherless] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/motherless.py | 93 ++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 youtube_dl/extractor/motherless.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3855e9b10..4ff2d3798 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -169,6 +169,7 @@ from .mpora import MporaIE from .mofosex import MofosexIE from .mooshare import MooshareIE from .morningstar import MorningstarIE +from .motherless import MotherlessIE from .motorsport import MotorsportIE from .moviezine import MoviezineIE from .movshare import MovShareIE diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py new file mode 100644 index 000000000..782651612 --- /dev/null +++ b/youtube_dl/extractor/motherless.py @@ -0,0 +1,93 @@ +from __future__ import unicode_literals + +import datetime +import re + +from .common import InfoExtractor +from ..utils import str_to_int + + +class MotherlessIE(InfoExtractor): + """Information Extractor for Motherless""" + _VALID_URL = r'http://(?:www\.)?motherless\.com/(?P<id>[A-Z0-9]+)' + _TESTS = [ + { + 'url': 'http://motherless.com/AC3FFE1', + 'md5': '5527fef81d2e529215dad3c2d744a7d9', + 'info_dict': { + 'id': 'AC3FFE1', + 'ext': 'flv', + 'title': 'Fucked in the ass while playing PS3', + 'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'], + 'upload_date': '20100913', + 'uploader_id': 'famouslyfuckedup', + 'thumbnail': 'http://thumbs.motherlessmedia.com/thumbs/AC3FFE1.jpg', + 'age_limit': 18, + } + }, + { + 'url': 'http://motherless.com/532291B', + 'md5': 'bc59a6b47d1f958e61fbd38a4d31b131', + 'info_dict': { + 'id': '532291B', + 'ext': 'mp4', + 'title': 'Amazing girl playing the omegle game, PERFECT!', + 'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', 'game', 'hairy'], + 'upload_date': '20140622', + 'uploader_id': 'Sulivana7x', + 'thumbnail': 'http://thumbs.motherlessmedia.com/thumbs/532291B.jpg', + 'age_limit': 18, + } + } + ] + + def _real_extract(self,url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'<title>(?P<title>.+?) - MOTHERLESS.COM', webpage, 'title') + video_url = self._search_regex(r"__fileurl = '(?P[^']+)'", webpage, 'video_url') + thumbnail = self._og_search_thumbnail(webpage) + age_limit = self._rta_search(webpage) # Hint: it's 18 ;) + view_count = str_to_int(self._html_search_regex(r'Views(.+?)', webpage, + 'view_count', flags=re.DOTALL)) + + like_count = str_to_int(self._html_search_regex(r'Favorited(.+?)', webpage, + 'like_count', flags=re.DOTALL)) + comment_count = webpage.count('class="media-comment-contents"') + uploader_id = self._html_search_regex(r'
.*?]*>(.+?)', + webpage, 'uploader_id', flags=re.DOTALL) + + categories = self._html_search_meta('keywords', webpage) + if categories is not None: + categories = [cat.strip() for cat in categories.split(',')] + + upload_date = self._html_search_regex(r'Uploaded(.+?)', webpage, + 'upload_date', flags=re.DOTALL) + mobj = re.search(r'(\d+) days? ago', upload_date, re.I) + if mobj is not None: + upload_date = datetime.datetime.now() - datetime.timedelta(days=int(mobj.group(1))) + else: + mobj = re.search(r'(\w+) (\d+)\w* (\d+)', upload_date, re.I) + if mobj is not None: + upload_date = datetime.datetime.strptime('%s %s %s' % mobj.groups(), '%b %d %Y').date() + else: + upload_date = None + if upload_date is not None: + upload_date = upload_date.strftime('%Y%m%d') + + return { + 'id': video_id, + 'title': title, + 'upload_date': upload_date, + 'uploader_id': uploader_id, + 'thumbnail': thumbnail, + 'categories': categories, + 'view_count': view_count, + 'like_count': like_count, + 'comment_count': comment_count, + 'age_limit': age_limit, + 'url': video_url, + } From 458ade6361dfd57c50f2dbc41ced37c2d42c8cbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= Date: Sat, 28 Jun 2014 10:22:53 +0200 Subject: [PATCH 153/340] [ArteTVFuture] fix empty formats list --- youtube_dl/extractor/arte.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index b528a9ec5..b42102f3d 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -111,7 +111,7 @@ class ArteTVPlus7IE(InfoExtractor): if not formats: # Some videos are only available in the 'Originalversion' # they aren't tagged as being in French or German - if all(f['versionCode'] == 'VO' for f in all_formats): + if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats): formats = all_formats else: raise ExtractorError(u'The formats list is empty') @@ -189,9 +189,10 @@ class ArteTVFutureIE(ArteTVPlus7IE): _TEST = { 'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081', 'info_dict': { - 'id': '050940-003', + 'id': '5201', 'ext': 'mp4', 'title': 'Les champignons au secours de la planète', + 'upload_date': '20131101', }, } From 803540e8119257cb2fabfdcd6632945385475448 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 28 Jun 2014 17:36:13 +0700 Subject: [PATCH 154/340] [drtv] Add missing extractor import --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/drtv.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3855e9b10..a1aaeb4b7 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -63,6 +63,7 @@ from .dailymotion import ( from .daum import DaumIE from .dotsub import DotsubIE from .dreisat import DreiSatIE +from .drtv import DRTVIE from .defense import DefenseGouvFrIE from .discovery import DiscoveryIE from .divxstage import DivxStageIE diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index 60f073ff0..cdccfd376 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -4,7 +4,7 @@ import re from .subtitles import SubtitlesInfoExtractor from .common import ExtractorError -from..utils import parse_iso8601 +from ..utils import parse_iso8601 class DRTVIE(SubtitlesInfoExtractor): From 78ff59d052ba1f6763d1e2e7f3d1f72765325d9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= Date: Sat, 28 Jun 2014 20:02:02 +0200 Subject: [PATCH 155/340] [Motherless] simplify --- youtube_dl/extractor/motherless.py | 58 ++++++++++++++---------------- youtube_dl/utils.py | 3 ++ 2 files changed, 29 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py index 782651612..6229b2173 100644 --- a/youtube_dl/extractor/motherless.py +++ b/youtube_dl/extractor/motherless.py @@ -4,11 +4,13 @@ import datetime import re from .common import InfoExtractor -from ..utils import str_to_int +from ..utils import ( + int_or_none, + unified_strdate, +) class MotherlessIE(InfoExtractor): - """Information Extractor for Motherless""" _VALID_URL = r'http://(?:www\.)?motherless\.com/(?P[A-Z0-9]+)' _TESTS = [ { @@ -21,7 +23,7 @@ class MotherlessIE(InfoExtractor): 'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'], 'upload_date': '20100913', 'uploader_id': 'famouslyfuckedup', - 'thumbnail': 'http://thumbs.motherlessmedia.com/thumbs/AC3FFE1.jpg', + 'thumbnail': 're:http://.*\.jpg', 'age_limit': 18, } }, @@ -35,7 +37,7 @@ class MotherlessIE(InfoExtractor): 'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', 'game', 'hairy'], 'upload_date': '20140622', 'uploader_id': 'Sulivana7x', - 'thumbnail': 'http://thumbs.motherlessmedia.com/thumbs/532291B.jpg', + 'thumbnail': 're:http://.*\.jpg', 'age_limit': 18, } } @@ -47,46 +49,38 @@ class MotherlessIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'(?P<title>.+?) - MOTHERLESS.COM', webpage, 'title') - video_url = self._search_regex(r"__fileurl = '(?P[^']+)'", webpage, 'video_url') - thumbnail = self._og_search_thumbnail(webpage) - age_limit = self._rta_search(webpage) # Hint: it's 18 ;) - view_count = str_to_int(self._html_search_regex(r'Views(.+?)', webpage, - 'view_count', flags=re.DOTALL)) + title = self._html_search_regex(r'id="view-upload-title">\s+([^<]+)<', webpage, 'title') + + video_url = self._html_search_regex(r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video_url') + age_limit = self._rta_search(webpage) + + view_count = self._html_search_regex(r'Views\s+([^<]+)<', webpage, 'view_count') + + upload_date = self._html_search_regex(r'Uploaded\s+([^<]+)<', webpage, 'upload_date') + if 'Ago' in upload_date: + days = int(re.search(r'([0-9]+)', upload_date).group(1)) + upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d') + else: + upload_date = unified_strdate(upload_date) + + like_count = self._html_search_regex(r'Favorited\s+([^<]+)<', webpage, 'like_count') - like_count = str_to_int(self._html_search_regex(r'Favorited(.+?)', webpage, - 'like_count', flags=re.DOTALL)) comment_count = webpage.count('class="media-comment-contents"') - uploader_id = self._html_search_regex(r'
.*?]*>(.+?)', - webpage, 'uploader_id', flags=re.DOTALL) + uploader_id = self._html_search_regex(r'"thumb-member-username">\s+Uploaded(.+?)', webpage, - 'upload_date', flags=re.DOTALL) - mobj = re.search(r'(\d+) days? ago', upload_date, re.I) - if mobj is not None: - upload_date = datetime.datetime.now() - datetime.timedelta(days=int(mobj.group(1))) - else: - mobj = re.search(r'(\w+) (\d+)\w* (\d+)', upload_date, re.I) - if mobj is not None: - upload_date = datetime.datetime.strptime('%s %s %s' % mobj.groups(), '%b %d %Y').date() - else: - upload_date = None - if upload_date is not None: - upload_date = upload_date.strftime('%Y%m%d') - return { 'id': video_id, 'title': title, 'upload_date': upload_date, 'uploader_id': uploader_id, - 'thumbnail': thumbnail, + 'thumbnail': self._og_search_thumbnail(webpage), 'categories': categories, - 'view_count': view_count, - 'like_count': like_count, + 'view_count': int_or_none(view_count.replace(',', '')), + 'like_count': int_or_none(like_count.replace(',', '')), 'comment_count': comment_count, 'age_limit': age_limit, 'url': video_url, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b97e62ae9..09312e81a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -816,6 +816,9 @@ def unified_strdate(date_str): '%d %b %Y', '%B %d %Y', '%b %d %Y', + '%b %dst %Y %I:%M%p', + '%b %dnd %Y %I:%M%p', + '%b %dth %Y %I:%M%p', '%Y-%m-%d', '%d.%m.%Y', '%d/%m/%Y', From 01ba178097d08b1337f9343029ee4eedb1b512a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jun 2014 04:51:47 +0700 Subject: [PATCH 156/340] [vk] Update test --- youtube_dl/extractor/vk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index fb082f364..66fe1dd3e 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -27,7 +27,7 @@ class VKIE(InfoExtractor): 'id': '162222515', 'ext': 'flv', 'title': 'ProtivoGunz - Хуёвая песня', - 'uploader': 'Noize MC', + 'uploader': 're:Noize MC.*', 'duration': 195, }, }, From ee8dda41aebf64ad5250347dbea38c901fd054c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= Date: Sun, 29 Jun 2014 08:21:23 +0200 Subject: [PATCH 157/340] [Toypics] support https urls --- youtube_dl/extractor/toypics.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/toypics.py b/youtube_dl/extractor/toypics.py index 34008afc6..0f389bd93 100644 --- a/youtube_dl/extractor/toypics.py +++ b/youtube_dl/extractor/toypics.py @@ -1,10 +1,13 @@ +# -*- coding:utf-8 -*- +from __future__ import unicode_literals + from .common import InfoExtractor import re class ToypicsIE(InfoExtractor): IE_DESC = 'Toypics user profile' - _VALID_URL = r'http://videos\.toypics\.net/view/(?P[0-9]+)/.*' + _VALID_URL = r'https?://videos\.toypics\.net/view/(?P[0-9]+)/.*' _TEST = { 'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/', 'md5': '16e806ad6d6f58079d210fe30985e08b', @@ -61,7 +64,7 @@ class ToypicsUserIE(InfoExtractor): note='Downloading page %d/%d' % (n, page_count)) urls.extend( re.findall( - r'

\n\s*', + r'

\s+', lpage)) return { From 62f1f9507f3c2406c915745d86d62a7b820c21fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= Date: Sun, 29 Jun 2014 09:08:46 +0200 Subject: [PATCH 158/340] [Tumblr] fix test + add description --- youtube_dl/extractor/tumblr.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 544369068..3971ad6df 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -10,14 +10,27 @@ from ..utils import ( class TumblrIE(InfoExtractor): _VALID_URL = r'http://(?P.*?)\.tumblr\.com/((post)|(video))/(?P\d*)($|/)' - _TEST = { + _TESTS = [{ 'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', - 'file': '54196191430.mp4', 'md5': '479bb068e5b16462f5176a6828829767', 'info_dict': { - "title": "tatiana maslany news" + 'id': '54196191430', + 'ext': 'mp4', + 'title': 'tatiana maslany news, Orphan Black || DVD extra - behind the scenes ↳...', + 'description': 'md5:dfac39636969fe6bf1caa2d50405f069', + 'thumbnail': 're:http://.*\.jpg', } - } + }, { + 'url': 'http://5sostrum.tumblr.com/post/90208453769/yall-forgetting-the-greatest-keek-of-them-all', + 'md5': 'bf348ef8c0ef84fbf1cbd6fa6e000359', + 'info_dict': { + 'id': '90208453769', + 'ext': 'mp4', + 'title': '5SOS STRUM ;)', + 'description': 'md5:dba62ac8639482759c8eb10ce474586a', + 'thumbnail': 're:http://.*\.jpg', + } + }] def _real_extract(self, url): m_url = re.match(self._VALID_URL, url) @@ -48,6 +61,7 @@ class TumblrIE(InfoExtractor): return [{'id': video_id, 'url': video_url, 'title': video_title, + 'description': self._html_search_meta('description', webpage), 'thumbnail': video_thumbnail, 'ext': ext }] From 9b27e6c3b49175e1e9b8f809607e7fbdee4cb9ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= Date: Sun, 29 Jun 2014 09:32:53 +0200 Subject: [PATCH 159/340] [Tumblr] fix encoding (PEP0263) --- youtube_dl/extractor/tumblr.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py index 3971ad6df..2882c1809 100644 --- a/youtube_dl/extractor/tumblr.py +++ b/youtube_dl/extractor/tumblr.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from __future__ import unicode_literals import re From 31a196d7f55d7d7676c08553474a5ec122178177 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= Date: Sun, 29 Jun 2014 13:45:10 +0200 Subject: [PATCH 160/340] [TeacherTube] add user + collection, removed classrooms --- test/test_playlists.py | 10 +++++----- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/teachertube.py | 28 +++++++++++++++++++--------- 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index 71dac1b02..994b1d4b0 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -28,7 +28,7 @@ from youtube_dl.extractor import ( SoundcloudSetIE, SoundcloudUserIE, SoundcloudPlaylistIE, - TeacherTubeClassroomIE, + TeacherTubeUserIE, LivestreamIE, LivestreamOriginalIE, NHLVideocenterIE, @@ -379,13 +379,13 @@ class TestPlaylists(unittest.TestCase): result['title'], 'Brace Yourself - Today\'s Weirdest News') self.assertTrue(len(result['entries']) >= 10) - def test_TeacherTubeClassroom(self): + def test_TeacherTubeUser(self): dl = FakeYDL() - ie = TeacherTubeClassroomIE(dl) - result = ie.extract('http://www.teachertube.com/view_classroom.php?user=rbhagwati2') + ie = TeacherTubeUserIE(dl) + result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'rbhagwati2') - self.assertTrue(len(result['entries']) >= 20) + self.assertTrue(len(result['entries']) >= 179) if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f910d1a26..24b046173 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -282,7 +282,7 @@ from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE from .teachertube import ( TeacherTubeIE, - TeacherTubeClassroomIE, + TeacherTubeUserIE, ) from .teachingchannel import TeachingChannelIE from .teamcoco import TeamcocoIE diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index d9868d569..73b4a3634 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -86,22 +86,32 @@ class TeacherTubeIE(InfoExtractor): } -class TeacherTubeClassroomIE(InfoExtractor): - IE_NAME = 'teachertube:classroom' - IE_DESC = 'teachertube.com online classrooms' +class TeacherTubeUserIE(InfoExtractor): + IE_NAME = 'teachertube:user:collection' + IE_DESC = 'teachertube.com user and collection videos' - _VALID_URL = r'https?://(?:www\.)?teachertube\.com/view_classroom\.php\?user=(?P[0-9a-zA-Z]+)' + _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P[0-9a-zA-Z]+)/?' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) user_id = mobj.group('user') - rss = self._download_xml( - 'http://www.teachertube.com/rssclassroom.php?mode=user&username=%s' % user_id, - user_id, 'Downloading classroom RSS') + urls = [] + webpage = self._download_webpage(url, user_id) + urls.extend(re.findall( + r'"sidebar_thumb_time">[0-9:]+

\s+', + webpage)) + + pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1] + for p in pages: + more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p) + webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1)) + urls.extend(re.findall( + r'"sidebar_thumb_time">[0-9:]+
\s+', + webpage)) entries = [] - for url in rss.findall('.//{http://search.yahoo.com/mrss/}player'): - entries.append(self.url_result(url.attrib['url'], 'TeacherTube')) + for url in urls: + entries.append(self.url_result(url, 'TeacherTube')) return self.playlist_result(entries, user_id) From 57bdc730e264ffbc93be55fe1541ca40fce48c49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jun 2014 19:33:39 +0700 Subject: [PATCH 161/340] [vk] Add support for more URL formats (#3172) --- youtube_dl/extractor/vk.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 66fe1dd3e..c48528ad9 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -16,7 +16,7 @@ from ..utils import ( class VKIE(InfoExtractor): IE_NAME = 'vk.com' - _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P-?\d+).*?\bid=(?P\d+)|(?:videos.*?\?.*?z=)?video(?P.*?)(?:\?|%2F|$))' + _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P-?\d+).*?\bid=(?P\d+)|(?:.+?\?.*?z=)?video(?P.*?)(?:\?|%2F|$))' _NETRC_MACHINE = 'vk' _TESTS = [ @@ -62,11 +62,35 @@ class VKIE(InfoExtractor): 'id': '164049491', 'ext': 'mp4', 'uploader': 'Триллеры', - 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]\u00a0', + 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]', 'duration': 8352, }, 'skip': 'Requires vk account credentials', }, + { + 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a', + 'md5': 'd82c22e449f036282d1d3f7f4d276869', + 'info_dict': { + 'id': '166094326', + 'ext': 'mp4', + 'uploader': 'Киномания - лучшее из мира кино', + 'title': 'Запах женщины (1992)', + 'duration': 9392, + }, + 'skip': 'Requires vk account credentials', + }, + { + 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d', + 'md5': '4d7a5ef8cf114dfa09577e57b2993202', + 'info_dict': { + 'id': '168067957', + 'ext': 'mp4', + 'uploader': 'Киномания - лучшее из мира кино', + 'title': ' ', + 'duration': 7291, + }, + 'skip': 'Requires vk account credentials', + }, ] def _login(self): From a8a98e43f214e6fe5d322dca3534a8ec926890b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jun 2014 19:51:00 +0700 Subject: [PATCH 162/340] [vk] Add support for mobile URLs --- youtube_dl/extractor/vk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index c48528ad9..4afb05923 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -16,7 +16,7 @@ from ..utils import ( class VKIE(InfoExtractor): IE_NAME = 'vk.com' - _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P-?\d+).*?\bid=(?P\d+)|(?:.+?\?.*?z=)?video(?P.*?)(?:\?|%2F|$))' + _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P-?\d+).*?\bid=(?P\d+)|(?:.+?\?.*?z=)?video(?P.*?)(?:\?|%2F|$))' _NETRC_MACHINE = 'vk' _TESTS = [ From 36fbc6887f22603444ec70ca2d690be0f3b4f5f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jun 2014 20:06:47 +0700 Subject: [PATCH 163/340] [ivi] Add support for embedded URLs --- youtube_dl/extractor/ivi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py index 528be1524..4027deb70 100644 --- a/youtube_dl/extractor/ivi.py +++ b/youtube_dl/extractor/ivi.py @@ -14,7 +14,7 @@ from ..utils import ( class IviIE(InfoExtractor): IE_DESC = 'ivi.ru' IE_NAME = 'ivi' - _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P[^/]+))?/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P\d+)' _TESTS = [ # Single movie From 849086a1ae153e0dbc5047cbcf8324938d7b7036 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jun 2014 20:07:59 +0700 Subject: [PATCH 164/340] [vk] Better support for embeds --- youtube_dl/extractor/vk.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 4afb05923..6c7db7a6f 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -91,6 +91,17 @@ class VKIE(InfoExtractor): }, 'skip': 'Requires vk account credentials', }, + { + 'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540', + 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6', + 'note': 'ivi.ru embed', + 'info_dict': { + 'id': '60690', + 'ext': 'mp4', + 'title': 'Книга Илая', + 'duration': 6771, + }, + }, ] def _login(self): @@ -134,6 +145,16 @@ class VKIE(InfoExtractor): if m_yt is not None: self.to_screen('Youtube video detected') return self.url_result(m_yt.group(1), 'Youtube') + + m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page) + if m_opts: + m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1)) + if m_opts_url: + opts_url = m_opts_url.group(1) + if opts_url.startswith('//'): + opts_url = 'http:' + opts_url + return self.url_result(opts_url) + data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars') data = json.loads(data_json) From 0364fa8b65a6c6742454ec5f3a858e06dc1527f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Jun 2014 20:18:23 +0700 Subject: [PATCH 165/340] [generic] Add support for ivi.ru embedded player --- youtube_dl/extractor/generic.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 9dd03aba4..869efb215 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -620,6 +620,11 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'VK') + # Look for embedded ivi player + mobj = re.search(r']+?src=(["\'])(?Phttps?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'Ivi') + # Look for embedded Huffington Post player mobj = re.search( r']+?src=(["\'])(?Phttps?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage) From 41b610acab43c03f71fb64ae55c0912352143ae7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= Date: Sun, 29 Jun 2014 16:43:31 +0200 Subject: [PATCH 166/340] [GooglePlus] fix video title extraction --- youtube_dl/extractor/googleplus.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index cc29a7e5d..07d994b44 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -52,8 +52,7 @@ class GooglePlusIE(InfoExtractor): # Extract title # Get the first line for title - video_title = self._html_search_regex(r' Date: Sun, 29 Jun 2014 20:33:46 +0200 Subject: [PATCH 167/340] [teachertube:user] fix regex --- youtube_dl/extractor/teachertube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index 73b4a3634..1a438e1e4 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -99,7 +99,7 @@ class TeacherTubeUserIE(InfoExtractor): urls = [] webpage = self._download_webpage(url, user_id) urls.extend(re.findall( - r'"sidebar_thumb_time">[0-9:]+\s+', + r'"sidebar_thumb_time">[0-9:]+\s+', webpage)) pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1] @@ -107,7 +107,7 @@ class TeacherTubeUserIE(InfoExtractor): more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p) webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1)) urls.extend(re.findall( - r'"sidebar_thumb_time">[0-9:]+\s+', + r'"sidebar_thumb_time">[0-9:]+\s+', webpage)) entries = [] From d518d06efd143072a64bcd5dc51e16c89bda06c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 30 Jun 2014 03:16:31 +0700 Subject: [PATCH 168/340] [vk] Skip georestricted ivi embed test --- youtube_dl/extractor/vk.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 6c7db7a6f..918bd1098 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -101,6 +101,7 @@ class VKIE(InfoExtractor): 'title': 'Книга Илая', 'duration': 6771, }, + 'skip': 'Only works from Russia', }, ] From 7807ee664dab04673b18722c4313abe09ee6b1be Mon Sep 17 00:00:00 2001 From: pulpe Date: Tue, 1 Jul 2014 09:59:57 +0200 Subject: [PATCH 169/340] [wdr] fix test --- youtube_dl/extractor/wdr.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index feeb44b45..f741ba540 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from __future__ import unicode_literals import re @@ -54,14 +55,14 @@ class WDRIE(InfoExtractor): }, }, { - 'url': 'http://www.funkhauseuropa.de/av/audiogrenzenlosleckerbaklava101-audioplayer.html', - 'md5': 'cfff440d4ee64114083ac44676df5d15', + 'url': 'http://www.funkhauseuropa.de/av/audiosuepersongsoulbossanova100-audioplayer.html', + 'md5': '24e83813e832badb0a8d7d1ef9ef0691', 'info_dict': { - 'id': 'mdb-363068', + 'id': 'mdb-463528', 'ext': 'mp3', - 'title': 'Grenzenlos lecker - Baklava', + 'title': 'Süpersong: Soul Bossa Nova', 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', - 'upload_date': '20140311', + 'upload_date': '20140630', }, }, ] @@ -127,9 +128,10 @@ class WDRMobileIE(InfoExtractor): 'info_dict': { 'title': '4283021', 'id': '421735', + 'ext': 'mp4', 'age_limit': 0, }, - '_skip': 'Will be depublicized shortly' + 'skip': 'Problems with loading data.' } def _real_extract(self, url): @@ -139,6 +141,7 @@ class WDRMobileIE(InfoExtractor): 'title': mobj.group('title'), 'age_limit': int(mobj.group('age_limit')), 'url': url, + 'ext': determine_ext(url), 'user_agent': 'mobile', } From 29f6ed78e87946979ab6472b118a4da7cf7ef0c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= Date: Tue, 1 Jul 2014 10:35:49 +0200 Subject: [PATCH 170/340] [tagesschau] replace 404 test --- youtube_dl/extractor/tagesschau.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index 36331529e..25b9864ad 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -20,13 +20,13 @@ class TagesschauIE(InfoExtractor): 'thumbnail': 're:^http:.*\.jpg$', }, }, { - 'url': 'http://www.tagesschau.de/multimedia/video/video-196.html', - 'md5': '8aaa8bf3ae1ca2652309718c03019128', + 'url': 'http://www.tagesschau.de/multimedia/video/video-5964.html', + 'md5': '66652566900963a3f962333579eeffcf', 'info_dict': { - 'id': '196', + 'id': '5964', 'ext': 'mp4', - 'title': 'Ukraine-Konflikt: Klitschko in Kiew als Bürgermeister vereidigt', - 'description': 'md5:f22e4af75821d174fa6c977349682691', + 'title': 'Nahost-Konflikt: Israel bombadiert Ziele im Gazastreifen und Westjordanland', + 'description': 'md5:07bfc78c48eec3145ed4805299a1900a', 'thumbnail': 're:http://.*\.jpg', }, }] From c67f584eb3cb3fe9ccb6ace6b6ed96594ca7799d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 1 Jul 2014 19:24:18 +0700 Subject: [PATCH 171/340] [rai] Skip test --- youtube_dl/extractor/rai.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py index cb4305349..ba3dd707f 100644 --- a/youtube_dl/extractor/rai.py +++ b/youtube_dl/extractor/rai.py @@ -35,7 +35,8 @@ class RaiIE(SubtitlesInfoExtractor): 'description': '', 'upload_date': '20140612', 'duration': 1758, - } + }, + 'skip': 'Error 404', }, { 'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html', From c4808c6009aef29c908139ee529f4938b7df8190 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 1 Jul 2014 15:48:18 +0200 Subject: [PATCH 172/340] [youtube_truncated_url] Add support for truncated watch URLs with annotations (#3178) --- youtube_dl/extractor/youtube.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6bdea1c44..ec3024cbd 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1825,10 +1825,18 @@ class YoutubeTruncatedURLIE(InfoExtractor): IE_NAME = 'youtube:truncated_url' IE_DESC = False # Do not list _VALID_URL = r'''(?x) - (?:https?://)?[^/]+/watch\?(?:feature=[a-z_]+)?$| + (?:https?://)?[^/]+/watch\?(?: + feature=[a-z_]+| + annotation_id=annotation_[^&]+ + )?$| (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$ ''' + _TESTS = [{ + 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041', + 'only_matching': True, + }] + def _real_extract(self, url): raise ExtractorError( u'Did you forget to quote the URL? Remember that & is a meta ' From dc2fc736911b5b8d769becd9227976e5caf267dc Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 1 Jul 2014 15:49:34 +0200 Subject: [PATCH 173/340] [youtube:truncated_url] Move test to extractor --- test/test_all_urls.py | 3 --- youtube_dl/extractor/youtube.py | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 4b56137ce..2bc81f020 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -69,9 +69,6 @@ class TestAllURLsMatching(unittest.TestCase): def test_youtube_show_matching(self): self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) - def test_youtube_truncated(self): - self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url']) - def test_youtube_search_matching(self): self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index ec3024cbd..bf0fbc924 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1835,6 +1835,9 @@ class YoutubeTruncatedURLIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041', 'only_matching': True, + }, { + 'url': 'http://www.youtube.com/watch?', + 'only_matching': True, }] def _real_extract(self, url): From 2fd466fcfcc9895230f806379d149389236acde2 Mon Sep 17 00:00:00 2001 From: hakatashi Date: Wed, 2 Jul 2014 02:32:54 +0900 Subject: [PATCH 174/340] [niconico] Download without authentication --- youtube_dl/extractor/niconico.py | 39 +++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 517a72561..ba7464cb8 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -39,15 +39,18 @@ class NiconicoIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$' _NETRC_MACHINE = 'niconico' + # Determine whether the downloader uses authentication to download video + _AUTHENTICATE = False def _real_initialize(self): - self._login() + if self._downloader.params.get('username', None) is not None: + self._AUTHENTICATE = True + + if self._AUTHENTICATE: + self._login() def _login(self): (username, password) = self._get_login_info() - if username is None: - # Login is required - raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) # Log in login_form_strs = { @@ -79,10 +82,30 @@ class NiconicoIE(InfoExtractor): 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, note='Downloading video info page') - # Get flv info - flv_info_webpage = self._download_webpage( - 'http://flapi.nicovideo.jp/api/getflv?v=' + video_id, - video_id, 'Downloading flv info') + if self._AUTHENTICATE: + # Get flv info + flv_info_webpage = self._download_webpage( + 'http://flapi.nicovideo.jp/api/getflv?v=' + video_id, + video_id, 'Downloading flv info') + else: + # Get external player info + ext_player_info = self._download_webpage( + 'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id) + thumb_play_key = self._search_regex( + r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey') + + # Get flv info + flv_info_data = compat_urllib_parse.urlencode({ + 'k': thumb_play_key, + 'v': video_id + }) + flv_info_request = compat_urllib_request.Request( + 'http://ext.nicovideo.jp/thumb_watch', flv_info_data, + {'Content-Type': 'application/x-www-form-urlencoded'}) + flv_info_webpage = self._download_webpage( + flv_info_request, video_id, + note='Downloading flv info', errnote='Unable to download flv info') + video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0] # Start extracting information From 64ed7a38f98e9b01feae757bec25b81db80b29f6 Mon Sep 17 00:00:00 2001 From: hakatashi Date: Wed, 2 Jul 2014 03:13:12 +0900 Subject: [PATCH 175/340] [niconico] Add support for channel video --- youtube_dl/extractor/niconico.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 517a72561..43d8644a4 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -37,7 +37,7 @@ class NiconicoIE(InfoExtractor): }, } - _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$' + _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z][a-z])?[0-9]+)(?:.*)$' _NETRC_MACHINE = 'niconico' def _real_initialize(self): @@ -91,20 +91,18 @@ class NiconicoIE(InfoExtractor): video_format = video_extension.upper() video_thumbnail = video_info.find('.//thumbnail_url').text video_description = video_info.find('.//description').text - video_uploader_id = video_info.find('.//user_id').text video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0]) video_view_count = video_info.find('.//view_counter').text video_webpage_url = video_info.find('.//watch_url').text # uploader - video_uploader = video_uploader_id - url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id - try: - user_info = self._download_xml( - url, video_id, note='Downloading user information') - video_uploader = user_info.find('.//nickname').text - except ExtractorError as err: - self._downloader.report_warning('Unable to download user info webpage: %s' % compat_str(err)) + # No need to fetch extra resources...new API has field for uploader's name + if video_info.find('.//ch_id') is not None: + video_uploader_id = video_info.find('.//ch_id').text + video_uploader = video_info.find('.//ch_name').text + elif video_info.find('.//user_id') is not None: + video_uploader_id = video_info.find('.//user_id').text + video_uploader = video_info.find('.//user_nickname').text return { 'id': video_id, From 93881db22a331ac7ce855c4680998aedf9a68cfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 2 Jul 2014 19:24:01 +0700 Subject: [PATCH 176/340] [anitube] Modernize --- youtube_dl/extractor/anitube.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py index 2b019daa9..31f0d417c 100644 --- a/youtube_dl/extractor/anitube.py +++ b/youtube_dl/extractor/anitube.py @@ -1,22 +1,24 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor class AnitubeIE(InfoExtractor): - IE_NAME = u'anitube.se' + IE_NAME = 'anitube.se' _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P\d+)' _TEST = { - u'url': u'http://www.anitube.se/video/36621', - u'md5': u'59d0eeae28ea0bc8c05e7af429998d43', - u'file': u'36621.mp4', - u'info_dict': { - u'id': u'36621', - u'ext': u'mp4', - u'title': u'Recorder to Randoseru 01', + 'url': 'http://www.anitube.se/video/36621', + 'md5': '59d0eeae28ea0bc8c05e7af429998d43', + 'info_dict': { + 'id': '36621', + 'ext': 'mp4', + 'title': 'Recorder to Randoseru 01', + 'duration': 180.19, }, - u'skip': u'Blocked in the US', + 'skip': 'Blocked in the US', } def _real_extract(self, url): @@ -24,13 +26,15 @@ class AnitubeIE(InfoExtractor): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', - webpage, u'key') + key = self._html_search_regex( + r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', webpage, 'key') - config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key, - key) + config_xml = self._download_xml( + 'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key) video_title = config_xml.find('title').text + thumbnail = config_xml.find('image').text + duration = float(config_xml.find('duration').text) formats = [] video_url = config_xml.find('file') @@ -49,5 +53,7 @@ class AnitubeIE(InfoExtractor): return { 'id': video_id, 'title': video_title, + 'thumbnail': thumbnail, + 'duration': duration, 'formats': formats } From 7aeb67b39b055e9586e7ab21c108a3176cfe0203 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 3 Jul 2014 21:08:44 +0700 Subject: [PATCH 177/340] [teachertube:user:collection] Update media regex --- youtube_dl/extractor/teachertube.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index 1a438e1e4..7167a036e 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -92,23 +92,21 @@ class TeacherTubeUserIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P[0-9a-zA-Z]+)/?' + _MEDIA_RE = r'(?s)"sidebar_thumb_time">[0-9:]+.+?' + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) user_id = mobj.group('user') urls = [] webpage = self._download_webpage(url, user_id) - urls.extend(re.findall( - r'"sidebar_thumb_time">[0-9:]+\s+', - webpage)) + urls.extend(re.findall(self._MEDIA_RE, webpage)) pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1] for p in pages: more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p) webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1)) - urls.extend(re.findall( - r'"sidebar_thumb_time">[0-9:]+\s+', - webpage)) + urls.extend(re.findall(self._MEDIA_RE, webpage)) entries = [] for url in urls: From 1e07fea200275a1230b80e405918cdeb29d1afd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 3 Jul 2014 21:11:56 +0700 Subject: [PATCH 178/340] [teachertube] Add support for new video URL format --- youtube_dl/extractor/teachertube.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index 7167a036e..2c2113b14 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -14,7 +14,7 @@ class TeacherTubeIE(InfoExtractor): IE_NAME = 'teachertube' IE_DESC = 'teachertube.com videos' - _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/|audio/)(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/(?:[\da-z-]+-)?|audio/)(?P\d+)' _TESTS = [{ 'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997', @@ -45,6 +45,15 @@ class TeacherTubeIE(InfoExtractor): 'title': 'PER ASPERA AD ASTRA', 'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P', }, + }, { + 'url': 'http://www.teachertube.com/video/intro-video-schleicher-297790', + 'md5': '9c79fbb2dd7154823996fc28d4a26998', + 'info_dict': { + 'id': '297790', + 'ext': 'mp4', + 'title': 'Intro Video - Schleicher', + 'description': 'Intro Video - Why to flip, how flipping will', + }, }] def _real_extract(self, url): From 6feb2d5e803dee49b2e4a8f3a7f33ca7f01f96b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Jul 2014 19:21:19 +0700 Subject: [PATCH 179/340] [youtube:search_url] Update regexes --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index bf0fbc924..f420b8148 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1698,14 +1698,14 @@ class YoutubeSearchURLIE(InfoExtractor): webpage = self._download_webpage(url, query) result_code = self._search_regex( - r'(?s)
    ', webpage, u'result HTML') + r'(?s)
      ', webpage, u'result HTML') part_codes = re.findall( r'(?s)

      (.*?)

      ', result_code) entries = [] for part_code in part_codes: part_title = self._html_search_regex( - r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False) + [r'(?s)title="([^"]+)"', r'>([^<]+)
      '], part_code, 'item title', fatal=False) part_url_snippet = self._html_search_regex( r'(?s)href="([^"]+)"', part_code, 'item URL') part_url = compat_urlparse.urljoin( From 15ce1338b42f906a5b9f812b8f8b5287eab8a20a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Jul 2014 22:05:46 +0700 Subject: [PATCH 180/340] [niconico] Extract more metadata and simplify (Closes #3181) --- youtube_dl/extractor/niconico.py | 57 +++++++++++++++++--------------- 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index 43d8644a4..31f60041c 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -8,10 +8,10 @@ from ..utils import ( compat_urllib_parse, compat_urllib_request, compat_urlparse, - compat_str, - ExtractorError, unified_strdate, + parse_duration, + int_or_none, ) @@ -30,6 +30,7 @@ class NiconicoIE(InfoExtractor): 'uploader_id': '2698420', 'upload_date': '20131123', 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', + 'duration': 33, }, 'params': { 'username': 'ydl.niconico@gmail.com', @@ -37,7 +38,7 @@ class NiconicoIE(InfoExtractor): }, } - _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z][a-z])?[0-9]+)(?:.*)$' + _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)' _NETRC_MACHINE = 'niconico' def _real_initialize(self): @@ -86,35 +87,39 @@ class NiconicoIE(InfoExtractor): video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0] # Start extracting information - video_title = video_info.find('.//title').text - video_extension = video_info.find('.//movie_type').text - video_format = video_extension.upper() - video_thumbnail = video_info.find('.//thumbnail_url').text - video_description = video_info.find('.//description').text - video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0]) - video_view_count = video_info.find('.//view_counter').text - video_webpage_url = video_info.find('.//watch_url').text + title = video_info.find('.//title').text + extension = video_info.find('.//movie_type').text + video_format = extension.upper() + thumbnail = video_info.find('.//thumbnail_url').text + description = video_info.find('.//description').text + upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0]) + view_count = int_or_none(video_info.find('.//view_counter').text) + comment_count = int_or_none(video_info.find('.//comment_num').text) + duration = parse_duration(video_info.find('.//length').text) + webpage_url = video_info.find('.//watch_url').text - # uploader - # No need to fetch extra resources...new API has field for uploader's name if video_info.find('.//ch_id') is not None: - video_uploader_id = video_info.find('.//ch_id').text - video_uploader = video_info.find('.//ch_name').text + uploader_id = video_info.find('.//ch_id').text + uploader = video_info.find('.//ch_name').text elif video_info.find('.//user_id') is not None: - video_uploader_id = video_info.find('.//user_id').text - video_uploader = video_info.find('.//user_nickname').text + uploader_id = video_info.find('.//user_id').text + uploader = video_info.find('.//user_nickname').text + else: + uploader_id = uploader = None return { 'id': video_id, 'url': video_real_url, - 'title': video_title, - 'ext': video_extension, + 'title': title, + 'ext': extension, 'format': video_format, - 'thumbnail': video_thumbnail, - 'description': video_description, - 'uploader': video_uploader, - 'upload_date': video_upload_date, - 'uploader_id': video_uploader_id, - 'view_count': video_view_count, - 'webpage_url': video_webpage_url, + 'thumbnail': thumbnail, + 'description': description, + 'uploader': uploader, + 'upload_date': upload_date, + 'uploader_id': uploader_id, + 'view_count': view_count, + 'comment_count': comment_count, + 'duration': duration, + 'webpage_url': webpage_url, } From b67f1840a181e682ca0e16b74aae7fe39fe2192f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Jul 2014 22:26:56 +0700 Subject: [PATCH 181/340] [niconico] Remove unused import --- youtube_dl/extractor/niconico.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index d98131271..c0c139b5d 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -8,7 +8,6 @@ from ..utils import ( compat_urllib_parse, compat_urllib_request, compat_urlparse, - ExtractorError, unified_strdate, parse_duration, int_or_none, From ba4133c9eb3ef342e2c1505e576f3fda674fe04c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 4 Jul 2014 22:30:43 +0700 Subject: [PATCH 182/340] Credit @hakatashi for #3181 #3182 --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 1e01432d2..37c40cb79 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -59,6 +59,7 @@ __authors__ = ( 'Adam Thalhammer', 'Georg Jähnig', 'Ralf Haring', + 'Koki Takahashi', ) __license__ = 'Public Domain' From 49cbe7c8e3b35f9492ee1dd816ed011aa3980d82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=AFlp=C3=A1n?= Date: Sat, 5 Jul 2014 14:42:26 +0200 Subject: [PATCH 183/340] [allocine] add extractor for allocine.fr (fixes #3189) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/allocine.py | 89 ++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 youtube_dl/extractor/allocine.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 24b046173..12cca5c2e 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -3,6 +3,7 @@ from .addanime import AddAnimeIE from .aftonbladet import AftonbladetIE from .anitube import AnitubeIE from .aol import AolIE +from .allocine import AllocineIE from .aparat import AparatIE from .appletrailers import AppleTrailersIE from .archiveorg import ArchiveOrgIE diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py new file mode 100644 index 000000000..34f0cd49b --- /dev/null +++ b/youtube_dl/extractor/allocine.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..utils import ( + compat_str, + qualities, + determine_ext, +) + + +class AllocineIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?Particle|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P[0-9]+)(?:\.html)?' + + _TESTS = [{ + 'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html', + 'md5': '0c9fcf59a841f65635fa300ac43d8269', + 'info_dict': { + 'id': '19546517', + 'ext': 'mp4', + 'title': 'Astérix - Le Domaine des Dieux Teaser VF', + 'description': 'md5:4a754271d9c6f16c72629a8a993ee884', + 'thumbnail': 're:http://.*\.jpg', + }, + }, { + 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html', + 'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0', + 'info_dict': { + 'id': '19540403', + 'ext': 'mp4', + 'title': 'Planes 2 Bande-annonce VF', + 'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d', + 'thumbnail': 're:http://.*\.jpg', + }, + }, { + 'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html', + 'md5': '101250fb127ef9ca3d73186ff22a47ce', + 'info_dict': { + 'id': '19544709', + 'ext': 'mp4', + 'title': 'Dragons 2 - Bande annonce finale VF', + 'description': 'md5:e74a4dc750894bac300ece46c7036490', + 'thumbnail': 're:http://.*\.jpg', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + typ = mobj.group('typ') + display_id = mobj.group('id') + + webpage = self._download_webpage(url, display_id) + + if typ == 'film': + video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id') + else: + player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player') + + player_data = json.loads(player) + video_id = compat_str(player_data['refMedia']) + + xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id) + + video = xml.find('.//AcVisionVideo').attrib + quality = qualities(['ld', 'md', 'hd']) + + formats = [] + for k, v in video.items(): + if re.match(r'.+_path', k): + format_id = k.split('_')[0] + formats.append({ + 'format_id': format_id, + 'quality': quality(format_id), + 'url': v, + 'ext': determine_ext(v), + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video['videoTitle'], + 'thumbnail': self._og_search_thumbnail(webpage), + 'formats': formats, + 'description': self._og_search_description(webpage), + } From 7571c02c8ad38919654d3cdd21ec567f57fe2451 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 6 Jul 2014 11:22:44 +0200 Subject: [PATCH 184/340] [generic] Set default-search to error This prevents users from submitting bug reports where they mistyped a URL, and prevents me from getting a weird video when holding shift and thus searching for :Tds --- youtube_dl/__init__.py | 2 +- youtube_dl/extractor/generic.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 37c40cb79..31ed63fcc 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -270,7 +270,7 @@ def parseOpts(overrideArguments=None): general.add_option( '--default-search', dest='default_search', metavar='PREFIX', - help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.') + help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.') general.add_option( '--ignore-config', action='store_true', diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 869efb215..f97b59845 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -383,7 +383,7 @@ class GenericIE(InfoExtractor): if not parsed_url.scheme: default_search = self._downloader.params.get('default_search') if default_search is None: - default_search = 'auto_warning' + default_search = 'error' if default_search in ('auto', 'auto_warning'): if '/' in url: @@ -397,8 +397,13 @@ class GenericIE(InfoExtractor): expected=True) else: self._downloader.report_warning( - 'Falling back to youtube search for %s . Set --default-search to "auto" to suppress this warning.' % url) + 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url) return self.url_result('ytsearch:' + url) + elif default_search == 'error': + raise ExtractorError( + ('%r is not a valid URL. ' + 'Set --default-search "ytseach" (or run youtube-dl "ytsearch:%s" ) to search YouTube' + ) % (url, url), expected=True) else: assert ':' in default_search return self.url_result(default_search + url) From 8d5797b00f2640cfc5d75ea0189e06d85a360639 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 6 Jul 2014 11:28:51 +0200 Subject: [PATCH 185/340] [YoutubeDL] Show download URL when -v is set This will allow us to debug issues like #3204 --- youtube_dl/YoutubeDL.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index dc0ba986a..3dff723b8 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -993,6 +993,8 @@ class YoutubeDL(object): fd = get_suitable_downloader(info)(self, self.params) for ph in self._progress_hooks: fd.add_progress_hook(ph) + if self.params.get('verbose'): + self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) return fd.download(name, info) if info_dict.get('requested_formats') is not None: downloaded = [] From 76bafa8ffe4631405bba17eb447f366b5c8ce734 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Jul 2014 18:53:31 +0700 Subject: [PATCH 186/340] [newstube] Capture error message --- youtube_dl/extractor/newstube.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/newstube.py b/youtube_dl/extractor/newstube.py index 2fd5b8f04..a860350af 100644 --- a/youtube_dl/extractor/newstube.py +++ b/youtube_dl/extractor/newstube.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ExtractorError class NewstubeIE(InfoExtractor): @@ -40,6 +41,10 @@ class NewstubeIE(InfoExtractor): def ns(s): return s.replace('/', '/%(ns)s') % {'ns': '{http://app1.newstube.ru/N2SiteWS/player.asmx}'} + error_message = player.find(ns('./ErrorMessage')) + if error_message is not None: + raise ExtractorError('%s returned error: %s' % (self.IE_NAME, error_message.text), expected=True) + session_id = player.find(ns('./SessionId')).text media_info = player.find(ns('./Medias/MediaInfo')) title = media_info.find(ns('./Name')).text From 1fd015516e8bb276c798983c243d45b6dd5054dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 6 Jul 2014 19:32:13 +0700 Subject: [PATCH 187/340] [newstube] Replace test --- youtube_dl/extractor/newstube.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/newstube.py b/youtube_dl/extractor/newstube.py index a860350af..551bd4d7a 100644 --- a/youtube_dl/extractor/newstube.py +++ b/youtube_dl/extractor/newstube.py @@ -10,13 +10,13 @@ from ..utils import ExtractorError class NewstubeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P.+)' _TEST = { - 'url': 'http://newstube.ru/media/na-korable-progress-prodolzhaetsya-testirovanie-sistemy-kurs', + 'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym', 'info_dict': { - 'id': 'd156a237-a6e9-4111-a682-039995f721f1', + 'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6', 'ext': 'flv', - 'title': 'На корабле «Прогресс» продолжается тестирование системы «Курс»', - 'description': 'md5:d0cbe7b4a6f600552617e48548d5dc77', - 'duration': 20.04, + 'title': 'Телеканал CNN переместил город Славянск в Крым', + 'description': 'md5:419a8c9f03442bc0b0a794d689360335', + 'duration': 31.05, }, 'params': { # rtmp download From 459af43494bca29ee0f079965b102e55ff72c04a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 7 Jul 2014 14:10:57 +0200 Subject: [PATCH 188/340] [arte] Manually set the rtmp play_path (fix #3198) rtmpdump doesn't parse it right --- youtube_dl/extractor/arte.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index b42102f3d..9591bad8a 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -39,7 +39,10 @@ class ArteTvIE(InfoExtractor): formats = [{ 'forma_id': q.attrib['quality'], - 'url': q.text, + # The playpath starts at 'mp4:', if we don't manually + # split the url, rtmpdump will incorrectly parse them + 'url': q.text.split('mp4:', 1)[0], + 'play_path': 'mp4:' + q.text.split('mp4:', 1)[1], 'ext': 'flv', 'quality': 2 if q.attrib['quality'] == 'hd' else 1, } for q in config.findall('./urls/url')] From 1aac03797ee43b40a410389aa3dfa4e4b2f2918d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 7 Jul 2014 20:12:59 +0700 Subject: [PATCH 189/340] [ninegag] Fix extraction --- youtube_dl/extractor/ninegag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dl/extractor/ninegag.py index c2e7b67c7..33daa0dec 100644 --- a/youtube_dl/extractor/ninegag.py +++ b/youtube_dl/extractor/ninegag.py @@ -47,7 +47,7 @@ class NineGagIE(InfoExtractor): webpage = self._download_webpage(url, display_id) post_view = json.loads(self._html_search_regex( - r'var postView = new app\.PostView\({\s*post:\s*({.+?}),', webpage, 'post view')) + r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view')) youtube_id = post_view['videoExternalId'] title = post_view['title'] From 3941669d691b337aa4bb1b13648a0573c37abd6e Mon Sep 17 00:00:00 2001 From: azeem Date: Mon, 7 Jul 2014 23:51:02 +0530 Subject: [PATCH 190/340] [soundcloud] Adding likes support to SoundcloudUserIE --- test/test_playlists.py | 8 ++++++++ youtube_dl/extractor/soundcloud.py | 15 +++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index 994b1d4b0..3a88cf270 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -137,6 +137,14 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['id'], '9615865') self.assertTrue(len(result['entries']) >= 12) + def test_soundcloud_likes(self): + dl = FakeYDL() + ie = SoundcloudUserIE(dl) + result = ie.extract('https://soundcloud.com/the-concept-band/likes') + self.assertIsPlaylist(result) + self.assertEqual(result['id'], '9615865') + self.assertTrue(len(result['entries']) >= 1) + def test_soundcloud_playlist(self): dl = FakeYDL() ie = SoundcloudPlaylistIE(dl) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 7aa100fb2..14ec9452d 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -255,7 +255,7 @@ class SoundcloudSetIE(SoundcloudIE): class SoundcloudUserIE(SoundcloudIE): - _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P[^/]+)(/?(tracks/)?)?(\?.*)?$' + _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P[^/]+)/?((?Ptracks|likes)/?)?(\?.*)?$' IE_NAME = 'soundcloud:user' # it's in tests/test_playlists.py @@ -264,24 +264,31 @@ class SoundcloudUserIE(SoundcloudIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) uploader = mobj.group('user') + resource = mobj.group('rsrc') + if resource is None: + resource = 'tracks' + elif resource == 'likes': + resource = 'favorites' url = 'http://soundcloud.com/%s/' % uploader resolv_url = self._resolv_url(url) user = self._download_json( resolv_url, uploader, 'Downloading user info') - base_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % uploader + base_url = 'http://api.soundcloud.com/users/%s/%s.json?' % (uploader, resource) entries = [] for i in itertools.count(): data = compat_urllib_parse.urlencode({ 'offset': i * 50, + 'limit': 50, 'client_id': self._CLIENT_ID, }) new_entries = self._download_json( base_url + data, uploader, 'Downloading track page %s' % (i + 1)) - entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries) - if len(new_entries) < 50: + if len(new_entries) == 0: + self.to_screen('%s: End page received' % uploader) break + entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries) return { '_type': 'playlist', From 6e1e0e4b5b1952b17007cf6489e0d3e2bc2a513a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 8 Jul 2014 20:22:27 +0700 Subject: [PATCH 191/340] [veoh] Skip deleted test video --- youtube_dl/extractor/veoh.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/veoh.py b/youtube_dl/extractor/veoh.py index fb132aef6..a7953a7e7 100644 --- a/youtube_dl/extractor/veoh.py +++ b/youtube_dl/extractor/veoh.py @@ -49,6 +49,7 @@ class VeohIE(InfoExtractor): 'description': 'md5:f5a11c51f8fb51d2315bca0937526891', 'uploader': 'newsy-videos', }, + 'skip': 'This video has been deleted.', }, ] From d6aa1967ad5b91cb12b306a9797c7c5097d54472 Mon Sep 17 00:00:00 2001 From: MikeCol Date: Wed, 9 Jul 2014 12:14:53 +0200 Subject: [PATCH 192/340] GoshGay Extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/goshgay.py | 72 ++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 youtube_dl/extractor/goshgay.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 12cca5c2e..e8598a2f5 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -112,6 +112,7 @@ from .generic import GenericIE from .googleplus import GooglePlusIE from .googlesearch import GoogleSearchIE from .gorillavid import GorillaVidIE +from .goshgay import GoshgayIE from .hark import HarkIE from .helsinki import HelsinkiIE from .hentaistigma import HentaiStigmaIE diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py new file mode 100644 index 000000000..3f31ec896 --- /dev/null +++ b/youtube_dl/extractor/goshgay.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urlparse, + str_to_int, + ExtractorError, +) +import json + + +class GoshgayIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P\d+?)($|/)' + _TEST = { + 'url': 'http://www.goshgay.com/video4116282', + 'md5': '268b9f3c3229105c57859e166dd72b03', + 'info_dict': { + 'id': '4116282', + 'ext': 'flv', + 'title': 'md5:089833a4790b5e103285a07337f245bf', + 'thumbnail': 're:http://.*\.jpg', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + title = self._search_regex(r'class="video-title">

      (.+?)<', webpage, 'title') + + player_config = self._search_regex(r'jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings', + fatal=True, flags=re.S) + player_vars = json.loads(player_config.replace("'", '"')) + width = str_to_int(player_vars.get('width')) + height = str_to_int(player_vars.get('height')) + config_uri = player_vars.get('config') + + if config_uri is None: + raise ExtractorError('Missing config URI') + node = self._download_xml(config_uri, video_id, 'Downloading player config XML', + errnote='Unable to download XML') + if node is None: + raise ExtractorError('Missing config XML') + if node.tag != 'config': + raise ExtractorError('Missing config attribute') + fns = node.findall('file') + imgs = node.findall('image') + if len(fns) != 1: + raise ExtractorError('Missing media URI') + video_url = fns[0].text + if len(imgs) < 1: + thumbnail = None + else: + thumbnail = imgs[0].text + + url_comp = compat_urlparse.urlparse(url) + ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2]) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'width': width, + 'height': height, + 'thumbnail': thumbnail, + 'http_referer': ref, + 'age_limit': 18, + } From 411f691b213f12d8020547316fb6c16239732a35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 9 Jul 2014 19:12:42 +0700 Subject: [PATCH 193/340] [mpora] Fix player regex --- youtube_dl/extractor/mpora.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py index 39d6feb98..387935d4d 100644 --- a/youtube_dl/extractor/mpora.py +++ b/youtube_dl/extractor/mpora.py @@ -28,7 +28,7 @@ class MporaIE(InfoExtractor): webpage = self._download_webpage(url, video_id) data_json = self._search_regex( - r"new FM\.Player\('[^']+',\s*(\{.*?)\);\n", webpage, 'json') + r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json') data = json.loads(data_json) From 537ba6f3818004ef43e0067fd1be8dbd1bbeed46 Mon Sep 17 00:00:00 2001 From: pachacamac Date: Wed, 9 Jul 2014 18:21:46 +0200 Subject: [PATCH 194/340] [Vodlocker] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/vodlocker.py | 66 +++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 youtube_dl/extractor/vodlocker.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c3160df1e..1666aa372 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -333,6 +333,7 @@ from .vine import ( ) from .viki import VikiIE from .vk import VKIE +from .vodlocker import VodlockerIE from .vube import VubeIE from .vuclip import VuClipIE from .vulture import VultureIE diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py new file mode 100644 index 000000000..fdab0e7bf --- /dev/null +++ b/youtube_dl/extractor/vodlocker.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +import time +from .common import InfoExtractor +from ..utils import ( + determine_ext, + compat_urllib_parse, + compat_urllib_request, +) + + +class VodlockerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vodlocker.com/(?P[0-9a-zA-Z]+)(?:\..*?)?' + + _TESTS = [{ + 'url': 'http://vodlocker.com/e8wvyzz4sl42', + 'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf', + 'info_dict': { + 'id': 'e8wvyzz4sl42', + 'ext': 'mp4', + 'title': 'Germany vs Brazil', + 'thumbnail': 're:http://.*\.jpg', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + url = 'http://vodlocker.com/%s' % video_id + + webpage = self._download_webpage(url, video_id) + + fields = dict(re.findall(r'''(?x)\s*(.*?)\s* Date: Thu, 10 Jul 2014 14:49:16 +0200 Subject: [PATCH 195/340] release 2014.07.10 --- README.md | 5 +++-- youtube_dl/version.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2bea609bf..dffdaa9dc 100644 --- a/README.md +++ b/README.md @@ -70,8 +70,9 @@ which means you can modify it, redistribute it or use it however you like. --default-search PREFIX Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large - apple". By default (with value "auto") - youtube-dl guesses. + apple". Use the value "auto" to let + youtube-dl guess. The default value "error" + just throws an error. --ignore-config Do not read configuration files. When given in the global configuration file /etc /youtube-dl.conf: do not read the user diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ab076489f..a8804b650 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.06.26' +__version__ = '2014.07.10' From b3a88780802a83686671945471d042dd864e7ccb Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 10:23:17 +0200 Subject: [PATCH 196/340] [youtube] Remove static signatures The always fail by now. Instead, use only automatic signature extraction --- youtube_dl/extractor/youtube.py | 50 ++------------------------------- 1 file changed, 3 insertions(+), 47 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f420b8148..15208f47f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -879,58 +879,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): if self._downloader.params.get('youtube_print_sig_code'): self._print_sig_code(func, len(s)) return func(s) - except Exception: + except Exception as e: tb = traceback.format_exc() - self._downloader.report_warning( - u'Automatic signature extraction failed: ' + tb) - - self._downloader.report_warning( - u'Warning: Falling back to static signature algorithm') + raise ExtractorError( + u'Automatic signature extraction failed: ' + tb, cause=e) return self._static_decrypt_signature( s, video_id, player_url, age_gate) - def _static_decrypt_signature(self, s, video_id, player_url, age_gate): - if age_gate: - # The videos with age protection use another player, so the - # algorithms can be different. - if len(s) == 86: - return s[2:63] + s[82] + s[64:82] + s[63] - - if len(s) == 93: - return s[86:29:-1] + s[88] + s[28:5:-1] - elif len(s) == 92: - return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] - elif len(s) == 91: - return s[84:27:-1] + s[86] + s[26:5:-1] - elif len(s) == 90: - return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] - elif len(s) == 89: - return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1] - elif len(s) == 88: - return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28] - elif len(s) == 87: - return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] - elif len(s) == 86: - return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1] - elif len(s) == 85: - return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84] - elif len(s) == 84: - return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1] - elif len(s) == 83: - return s[80:63:-1] + s[0] + s[62:0:-1] + s[63] - elif len(s) == 82: - return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37] - elif len(s) == 81: - return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] - elif len(s) == 80: - return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80] - elif len(s) == 79: - return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] - - else: - raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) - def _get_available_subtitles(self, video_id, webpage): try: sub_list = self._download_webpage( From 6f9d4d542f8a5f565fe7811e6d07553f4d9c69cc Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 10:34:01 +0200 Subject: [PATCH 197/340] [youtube] Add test for new signature scheme (#3232) --- test/test_youtube_signature.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 8417c55a6..6e0fa14a8 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -33,6 +33,12 @@ _TESTS = [ 90, u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', ), + ( + u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', + u'js', + u'BF51B8F76F05D81CEAED01F5ACE376131B23D830.5805F8368CB04C36C973A8CF997B774AC4B685B77', + u'2909FDCA8C5E6D92D34B34E7C7AFFD7CA57532DA.5BA2848AD58DAA15002012C7CD77187D24E048A5', + ), ] @@ -44,7 +50,7 @@ class TestSignature(unittest.TestCase): os.mkdir(self.TESTDATA_DIR) -def make_tfunc(url, stype, sig_length, expected_sig): +def make_tfunc(url, stype, sig_input, expected_sig): basename = url.rpartition('/')[2] m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename) assert m, '%r should follow URL format' % basename @@ -66,7 +72,9 @@ def make_tfunc(url, stype, sig_length, expected_sig): with open(fn, 'rb') as testf: swfcode = testf.read() func = ie._parse_sig_swf(swfcode) - src_sig = compat_str(string.printable[:sig_length]) + src_sig = ( + compat_str(string.printable[:sig_input]) + if isinstance(sig_input, int) else sig_input) got_sig = func(src_sig) self.assertEqual(got_sig, expected_sig) From 61989fb5e9613b042c7f72d06e141242d60a1fde Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 10:40:02 +0200 Subject: [PATCH 198/340] [jsinterp] Remove superfluous u --- youtube_dl/jsinterp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 449482d3c..d7e76713f 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -59,7 +59,7 @@ class JSInterpreter(object): if member == 'split("")': return list(val) if member == 'join("")': - return u''.join(val) + return ''.join(val) if member == 'length': return len(val) if member == 'reverse()': From c8bf86d50d65ac434c7d683c21ec4d362f0cf030 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 10:44:39 +0200 Subject: [PATCH 199/340] [youtube] Correct signature extraction error detection --- youtube_dl/extractor/youtube.py | 39 ++++++++++++++++----------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 15208f47f..6123e1256 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -865,27 +865,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _decrypt_signature(self, s, video_id, player_url, age_gate=False): """Turn the encrypted s field into a working signature""" - if player_url is not None: - if player_url.startswith(u'//'): - player_url = u'https:' + player_url - try: - player_id = (player_url, len(s)) - if player_id not in self._player_cache: - func = self._extract_signature_function( - video_id, player_url, len(s) - ) - self._player_cache[player_id] = func - func = self._player_cache[player_id] - if self._downloader.params.get('youtube_print_sig_code'): - self._print_sig_code(func, len(s)) - return func(s) - except Exception as e: - tb = traceback.format_exc() - raise ExtractorError( - u'Automatic signature extraction failed: ' + tb, cause=e) + if player_url is None: + raise ExtractorError(u'Cannot decrypt signature without player_url') - return self._static_decrypt_signature( - s, video_id, player_url, age_gate) + if player_url.startswith(u'//'): + player_url = u'https:' + player_url + try: + player_id = (player_url, len(s)) + if player_id not in self._player_cache: + func = self._extract_signature_function( + video_id, player_url, len(s) + ) + self._player_cache[player_id] = func + func = self._player_cache[player_id] + if self._downloader.params.get('youtube_print_sig_code'): + self._print_sig_code(func, len(s)) + return func(s) + except Exception as e: + tb = traceback.format_exc() + raise ExtractorError( + u'Automatic signature extraction failed: ' + tb, cause=e) def _get_available_subtitles(self, video_id, webpage): try: From fc040bfd058202b0b8c9f69b12e3a2d32e8f380c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 10:44:56 +0200 Subject: [PATCH 200/340] [jsinterp] Prevent mis-recognitions of local functions --- youtube_dl/jsinterp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index d7e76713f..3bbb07704 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -99,7 +99,7 @@ class JSInterpreter(object): def extract_function(self, funcname): func_m = re.search( - (r'(?:function %s|%s\s*=\s*function)' % ( + (r'(?:function %s|[{;]%s\s*=\s*function)' % ( re.escape(funcname), re.escape(funcname))) + r'\((?P[a-z,]+)\){(?P[^}]+)}', self.code) From f64ebfe3e542e9648b8f9f268457de949d494901 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 10:46:08 +0200 Subject: [PATCH 201/340] [youtube] Correct signature test --- test/test_youtube_signature.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 6e0fa14a8..8d46fe108 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -36,8 +36,8 @@ _TESTS = [ ( u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', u'js', - u'BF51B8F76F05D81CEAED01F5ACE376131B23D830.5805F8368CB04C36C973A8CF997B774AC4B685B77', - u'2909FDCA8C5E6D92D34B34E7C7AFFD7CA57532DA.5BA2848AD58DAA15002012C7CD77187D24E048A5', + u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', + u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', ), ] From 391d53e1ddb55928a2aa7735487e166e582af024 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 10:49:41 +0200 Subject: [PATCH 202/340] release 2014.07.11 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a8804b650..d6b05892c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.10' +__version__ = '2014.07.11' From 4094b6e36d03a6230689657d87de7a58f3f0b581 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 10:57:08 +0200 Subject: [PATCH 203/340] [vodlocker] PEP8, generalization, and simplification (#3223) --- youtube_dl/extractor/common.py | 11 +++++++++-- youtube_dl/extractor/vodlocker.py | 19 +++++++++---------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e4e4feef9..f1ed30704 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1,11 +1,12 @@ import base64 import hashlib import json +import netrc import os import re import socket import sys -import netrc +import time import xml.etree.ElementTree from ..utils import ( @@ -575,6 +576,13 @@ class InfoExtractor(object): else: return url + def _sleep(self, timeout, video_id, msg_template=None): + if msg_template is None: + msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds' + msg = msg_template % {'video_id': video_id, 'timeout': timeout} + self.to_screen(msg) + time.sleep(timeout) + class SearchInfoExtractor(InfoExtractor): """ @@ -618,4 +626,3 @@ class SearchInfoExtractor(InfoExtractor): @property def SEARCH_KEY(self): return self._SEARCH_KEY - diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index fdab0e7bf..dfc570930 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -28,9 +28,6 @@ class VodlockerIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - - url = 'http://vodlocker.com/%s' % video_id - webpage = self._download_webpage(url, video_id) fields = dict(re.findall(r'''(?x)\s*(.*?)\s*\s*(.*?)\s* Date: Fri, 11 Jul 2014 11:01:59 +0200 Subject: [PATCH 204/340] [goshgay] PEP8 and test for age_limit (#3220) --- youtube_dl/extractor/goshgay.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py index 3f31ec896..7bca21ad0 100644 --- a/youtube_dl/extractor/goshgay.py +++ b/youtube_dl/extractor/goshgay.py @@ -22,6 +22,7 @@ class GoshgayIE(InfoExtractor): 'ext': 'flv', 'title': 'md5:089833a4790b5e103285a07337f245bf', 'thumbnail': 're:http://.*\.jpg', + 'age_limit': 18, } } @@ -32,8 +33,8 @@ class GoshgayIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._search_regex(r'class="video-title">

      (.+?)<', webpage, 'title') - player_config = self._search_regex(r'jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings', - fatal=True, flags=re.S) + player_config = self._search_regex( + r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings') player_vars = json.loads(player_config.replace("'", '"')) width = str_to_int(player_vars.get('width')) height = str_to_int(player_vars.get('height')) From 953b3586687f859d8b9fc7e8d9c155fb360ee587 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 11:05:16 +0200 Subject: [PATCH 205/340] [gorillavid] Add support for daclips.in (Closes #3213) --- youtube_dl/extractor/gorillavid.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py index aa15cafc3..50ef54cce 100644 --- a/youtube_dl/extractor/gorillavid.py +++ b/youtube_dl/extractor/gorillavid.py @@ -12,7 +12,12 @@ from ..utils import ( class GorillaVidIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gorillavid\.in/(?:embed-)?(?P[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?' + IE_DESC = 'GorillaVid.in and daclips.in' + _VALID_URL = r'''(?x) + https?://(?:www\.)? + (?:daclips\.in|gorillavid\.in)/ + (?:embed-)?(?P[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)? + ''' _TESTS = [{ 'url': 'http://gorillavid.in/06y9juieqpmi', @@ -32,14 +37,20 @@ class GorillaVidIE(InfoExtractor): 'title': 'Say something nice', 'thumbnail': 're:http://.*\.jpg', }, + }, { + 'url': 'http://daclips.in/3rso4kdn6f9m', + 'info_dict': { + 'id': '3rso4kdn6f9m', + 'ext': 'mp4', + 'title': 'Micro Pig piglets ready on 16th July 2009', + 'thumbnail': 're:http://.*\.jpg', + }, }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - url = 'http://gorillavid.in/%s' % video_id - webpage = self._download_webpage(url, video_id) fields = dict(re.findall(r'''(?x) Date: Fri, 11 Jul 2014 11:08:36 +0200 Subject: [PATCH 206/340] [vimple] Do not fail if duration is missing --- youtube_dl/extractor/vimple.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index f3a807cd3..86344849a 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -1,11 +1,13 @@ # coding: utf-8 from __future__ import unicode_literals + import re import zlib import base64 import xml.etree.ElementTree from .common import InfoExtractor +from ..utils import int_or_none class VimpleIE(InfoExtractor): @@ -79,6 +81,6 @@ class VimpleIE(InfoExtractor): 'title': video.find('Title').text, 'formats': formats, 'thumbnail': video.find('Poster').get('url'), - 'duration': int(video.get('duration')), + 'duration': int_or_none(video.get('duration')), 'webpage_url': video.find('Share').get('videoPageUrl'), } From e93f4f7578955b2484fe1e8927a3b5dafd9d5b52 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 11:09:01 +0200 Subject: [PATCH 207/340] [vodlocker] Remove unused imports --- youtube_dl/extractor/vodlocker.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index dfc570930..68c59364b 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -2,10 +2,8 @@ from __future__ import unicode_literals import re -import time from .common import InfoExtractor from ..utils import ( - determine_ext, compat_urllib_parse, compat_urllib_request, ) From 1eb867f33fe8147cc959e7d2fcc0701a0489dc29 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 11:11:09 +0200 Subject: [PATCH 208/340] [vimple] Simplify and PEP8 --- youtube_dl/extractor/vimple.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/vimple.py b/youtube_dl/extractor/vimple.py index 86344849a..33d370e1c 100644 --- a/youtube_dl/extractor/vimple.py +++ b/youtube_dl/extractor/vimple.py @@ -1,10 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -import re -import zlib import base64 +import re import xml.etree.ElementTree +import zlib from .common import InfoExtractor from ..utils import int_or_none @@ -38,21 +38,21 @@ class VimpleIE(InfoExtractor): }, ] - # http://jsunpack-n.googlecode.com/svn-history/r63/trunk/swf.py - def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') iframe_url = 'http://player.vimple.ru/iframe/%s' % video_id - iframe = self._download_webpage(iframe_url, video_id, note='Downloading iframe', errnote='unable to fetch iframe') - player_url = self._html_search_regex(r'"(http://player.vimple.ru/flash/.+?)"', iframe, 'player url') + iframe = self._download_webpage( + iframe_url, video_id, + note='Downloading iframe', errnote='unable to fetch iframe') + player_url = self._html_search_regex( + r'"(http://player.vimple.ru/flash/.+?)"', iframe, 'player url') - player = self._request_webpage(player_url, video_id, note='Downloading swf player').read() + player = self._request_webpage( + player_url, video_id, note='Downloading swf player').read() - # http://stackoverflow.com/a/6804758 - # http://stackoverflow.com/a/12073686 player = zlib.decompress(player[8:]) xml_pieces = re.findall(b'([a-zA-Z0-9 =+/]{500})', player) From 3d55f2806ef23d567722ee61f7bf9d0662f81639 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 11:11:52 +0200 Subject: [PATCH 209/340] Credit @irtusb for vimple (#3073) --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 31ed63fcc..24ccc9eb8 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -60,6 +60,7 @@ __authors__ = ( 'Georg Jähnig', 'Ralf Haring', 'Koki Takahashi', + 'Ariset Llerena', ) __license__ = 'Public Domain' From 04c77a54b0542b914a979d04bcc7b86dd375f828 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 11:15:35 +0200 Subject: [PATCH 210/340] [tenplay] PEP8 --- youtube_dl/extractor/tenplay.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/tenplay.py b/youtube_dl/extractor/tenplay.py index 449351551..8477840fc 100644 --- a/youtube_dl/extractor/tenplay.py +++ b/youtube_dl/extractor/tenplay.py @@ -5,11 +5,12 @@ import re from .common import InfoExtractor + class TenPlayIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+' _TEST = { 'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way', - 'md5': 'c9dda6aac8f814352ad2aee8899b1612', + #'md5': 'd68703d9f73dc8fccf3320ab34202590', 'info_dict': { 'id': '2695695426001', 'ext': 'flv', @@ -17,17 +18,28 @@ class TenPlayIE(InfoExtractor): 'description': 'Welcome to a new TV experience. Enjoy a taste of the TENplay benefits.', 'timestamp': 1380150606.889, 'upload_date': '20130925', - 'uploader': 'TENplay' + 'uploader': 'TENplay', + }, + 'params': { + 'skip_download': True, # Requires rtmpdump } } - _video_fields = ["id","name","shortDescription","longDescription","creationDate","publishedDate","lastModifiedDate","customFields","videoStillURL","thumbnailURL","referenceId","length","playsTotal","playsTrailingWeek","renditions","captioning","startDate","endDate"] + _video_fields = [ + "id", "name", "shortDescription", "longDescription", "creationDate", + "publishedDate", "lastModifiedDate", "customFields", "videoStillURL", + "thumbnailURL", "referenceId", "length", "playsTotal", + "playsTrailingWeek", "renditions", "captioning", "startDate", "endDate"] def _real_extract(self, url): webpage = self._download_webpage(url, url) - video_id = self._html_search_regex(r'videoID: "(\d+?)"', webpage, 'video_id') - api_token = self._html_search_regex(r'apiToken: "([a-zA-Z0-9-_\.]+?)"', webpage, 'api_token') - title = self._html_search_regex(r'', webpage, 'title') + video_id = self._html_search_regex( + r'videoID: "(\d+?)"', webpage, 'video_id') + api_token = self._html_search_regex( + r'apiToken: "([a-zA-Z0-9-_\.]+?)"', webpage, 'api_token') + title = self._html_search_regex( + r'', + webpage, 'title') json = self._download_json('https://api.brightcove.com/services/library?command=find_video_by_id&video_id=%s&token=%s&video_fields=%s' % (video_id, api_token, ','.join(self._video_fields)), title) @@ -50,8 +62,8 @@ class TenPlayIE(InfoExtractor): 'ext': ext, 'vcodec': rendition['videoCodec'].lower(), 'container': rendition['videoContainer'].lower(), - 'url': url - }) + 'url': url, + }) return { 'id': video_id, From cdc22cb8861e95a874f0271c84dbc6be487e03fc Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 11:16:04 +0200 Subject: [PATCH 211/340] Credit @adammw for tenplay (#2954) --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 24ccc9eb8..c1f8a401e 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -61,6 +61,7 @@ __authors__ = ( 'Ralf Haring', 'Koki Takahashi', 'Ariset Llerena', + 'Adam Malcontenti-Wilson', ) __license__ = 'Public Domain' From d96b9d40f04110f427e5bbd2dcc75aeb375291c7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 11:27:44 +0200 Subject: [PATCH 212/340] [gameone] Sort formats --- youtube_dl/extractor/gameone.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py index 2544ea521..b580f52fb 100644 --- a/youtube_dl/extractor/gameone.py +++ b/youtube_dl/extractor/gameone.py @@ -76,6 +76,7 @@ class GameOneIE(InfoExtractor): } for r in rendition_items ] + self._sort_formats(formats) return { 'id': video_id, From 1df0ae217055c5af5e4ca9904d9d77a41b828f86 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 11:29:17 +0200 Subject: [PATCH 213/340] Credit @tobidope for gameone (#2941) --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index c1f8a401e..e55cba9f4 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -62,6 +62,7 @@ __authors__ = ( 'Koki Takahashi', 'Ariset Llerena', 'Adam Malcontenti-Wilson', + 'Tobias Bell', ) __license__ = 'Public Domain' From fada438acf7220fbff6450800833585d0b0a1843 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 11:53:28 +0200 Subject: [PATCH 214/340] release 2014.07.11.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index d6b05892c..ac3f72d5b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.11' +__version__ = '2014.07.11.1' From 4e415288d73f3ec15a0b2854de79c7359d1ae6fe Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 13:21:32 +0200 Subject: [PATCH 215/340] [criterion] Simplify and modernize --- youtube_dl/extractor/criterion.py | 51 ++++++++++++++++--------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py index 31fe3d57b..4fb178165 100644 --- a/youtube_dl/extractor/criterion.py +++ b/youtube_dl/extractor/criterion.py @@ -1,40 +1,43 @@ # -*- coding: utf-8 -*- +from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import determine_ext + class CriterionIE(InfoExtractor): - _VALID_URL = r'https?://www\.criterion\.com/films/(\d*)-.+' + _VALID_URL = r'https?://www\.criterion\.com/films/(?P[0-9]+)-.+' _TEST = { - u'url': u'http://www.criterion.com/films/184-le-samourai', - u'file': u'184.mp4', - u'md5': u'bc51beba55685509883a9a7830919ec3', - u'info_dict': { - u"title": u"Le Samouraï", - u"description" : u'md5:a2b4b116326558149bef81f76dcbb93f', + 'url': 'http://www.criterion.com/films/184-le-samourai', + 'md5': 'bc51beba55685509883a9a7830919ec3', + 'info_dict': { + 'id': '184', + 'ext': 'mp4', + 'title': 'Le Samouraï', + 'description': 'md5:a2b4b116326558149bef81f76dcbb93f', } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - video_id = mobj.group(1) + video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - final_url = self._search_regex(r'so.addVariable\("videoURL", "(.+?)"\)\;', - webpage, 'video url') - title = self._html_search_regex(r'', - webpage, 'video title') - description = self._html_search_regex(r'', - webpage, 'video description') - thumbnail = self._search_regex(r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', - webpage, 'thumbnail url') + final_url = self._search_regex( + r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') + title = self._og_search_title(webpage) + description = self._html_search_regex( + r'', + webpage, 'video description') + thumbnail = self._search_regex( + r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', + webpage, 'thumbnail url') - return {'id': video_id, - 'url' : final_url, - 'title': title, - 'ext': determine_ext(final_url), - 'description': description, - 'thumbnail': thumbnail, - } + return { + 'id': video_id, + 'url': final_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + } From 38ad119f97cba871d34b057050547ba56b3e54c6 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 13:34:19 +0200 Subject: [PATCH 216/340] [screencast] Add new extractor (Fixes #3236) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/screencast.py | 48 ++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 youtube_dl/extractor/screencast.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index fcc7d0b58..15d2f0e2a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -248,6 +248,7 @@ from .rutube import ( from .rutv import RUTVIE from .savefrom import SaveFromIE from .scivee import SciVeeIE +from .screencast import ScreencastIE from .servingsys import ServingSysIE from .sina import SinaIE from .slideshare import SlideshareIE diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py new file mode 100644 index 000000000..f2ced39c4 --- /dev/null +++ b/youtube_dl/extractor/screencast.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_parse_qs, + compat_urllib_request, +) + + +class ScreencastIE(InfoExtractor): + _VALID_URL = r'https?://www\.screencast\.com/t/(?P[a-zA-Z0-9]+)' + _TEST = { + 'url': 'http://www.screencast.com/t/3ZEjQXlT', + 'md5': '917df1c13798a3e96211dd1561fded83', + 'info_dict': { + 'id': '3ZEjQXlT', + 'ext': 'm4v', + 'title': 'Color Measurement with Ocean Optics Spectrometers', + 'description': 'md5:240369cde69d8bed61349a199c5fb153', + 'thumbnail': 're:^https?://.*\.jpg$' + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + + flash_vars_s = self._html_search_regex( + r' Date: Fri, 11 Jul 2014 13:34:48 +0200 Subject: [PATCH 217/340] release 2014.07.11.2 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index ac3f72d5b..7ea6e7d43 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.11.1' +__version__ = '2014.07.11.2' From 40c696e5c6e01bd94ae0d5f17ef77c368588106c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 15:38:18 +0200 Subject: [PATCH 218/340] [screencast] Add suppot for more video types (#3236) --- youtube_dl/extractor/common.py | 4 +- youtube_dl/extractor/screencast.py | 69 +++++++++++++++++++++++++----- 2 files changed, 60 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index f1ed30704..e68657314 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -463,14 +463,14 @@ class InfoExtractor(object): def _og_search_url(self, html, **kargs): return self._og_search_property('url', html, **kargs) - def _html_search_meta(self, name, html, display_name=None, fatal=False): + def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs): if display_name is None: display_name = name return self._html_search_regex( r'''(?ix)]+(?:itemprop|name|property)=["\']%s["\']) [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), - html, display_name, fatal=fatal) + html, display_name, fatal=fatal, **kwargs) def _dc_search_uploader(self, html): return self._html_search_meta('dc.creator', html, 'uploader') diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index f2ced39c4..ba69739b2 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..utils import ( + ExtractorError, compat_parse_qs, compat_urllib_request, ) @@ -12,7 +13,7 @@ from ..utils import ( class ScreencastIE(InfoExtractor): _VALID_URL = r'https?://www\.screencast\.com/t/(?P[a-zA-Z0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'http://www.screencast.com/t/3ZEjQXlT', 'md5': '917df1c13798a3e96211dd1561fded83', 'info_dict': { @@ -20,24 +21,70 @@ class ScreencastIE(InfoExtractor): 'ext': 'm4v', 'title': 'Color Measurement with Ocean Optics Spectrometers', 'description': 'md5:240369cde69d8bed61349a199c5fb153', - 'thumbnail': 're:^https?://.*\.jpg$' + 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', } - } + }, { + 'url': 'http://www.screencast.com/t/V2uXehPJa1ZI', + 'md5': 'e8e4b375a7660a9e7e35c33973410d34', + 'info_dict': { + 'id': 'V2uXehPJa1ZI', + 'ext': 'mov', + 'title': 'The Amadeus Spectrometer', + 'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit', + 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://www.screencast.com/t/aAB3iowa', + 'md5': 'dedb2734ed00c9755761ccaee88527cd', + 'info_dict': { + 'id': 'aAB3iowa', + 'ext': 'mp4', + 'title': 'Google Earth Export', + 'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.', + 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + } + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - flash_vars_s = self._html_search_regex( - r'>(.*?)<', + webpage, 'title') + thumbnail = self._og_search_thumbnail(webpage) + description = self._og_search_description(webpage, default=None) + if description is None: + description = self._html_search_meta('description', webpage) return { 'id': video_id, From 133af9385b1a8ae593718561ab7b92cc52332016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Fri, 11 Jul 2014 16:16:30 +0200 Subject: [PATCH 219/340] Update supported formats for the --recode-video option (#3228) --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index e55cba9f4..89a2cb3e8 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -509,7 +509,7 @@ def parseOpts(overrideArguments=None): postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5', help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)') postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None, - help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm)') + help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)') postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, help='keeps the video file on disk after the post-processing; the video is erased by default') postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, From 00ac799b6875c14886d18328c8a6563f751127a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 11 Jul 2014 22:04:24 +0700 Subject: [PATCH 220/340] [vine:user] Update test --- test/test_playlists.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index 3a88cf270..1a38a667b 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -111,7 +111,7 @@ class TestPlaylists(unittest.TestCase): ie = VineUserIE(dl) result = ie.extract('https://vine.co/Visa') self.assertIsPlaylist(result) - self.assertTrue(len(result['entries']) >= 50) + self.assertTrue(len(result['entries']) >= 47) def test_ustream_channel(self): dl = FakeYDL() From 345e37831c6d6e215986f956f5dbf0578773ed38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 11 Jul 2014 22:08:04 +0700 Subject: [PATCH 221/340] [youtube] Update nosubtitles test --- test/test_subtitles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 5736fe581..48c302198 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -87,7 +87,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles): def test_youtube_nosubtitles(self): self.DL.expect_warning(u'video doesn\'t have subtitles') - self.url = 'sAjKT8FhjI8' + self.url = 'n5BB19UTcdA' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() From 09018e19a596f3a39bf7d871d8bb14c185b2470b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Fri, 11 Jul 2014 17:21:16 +0200 Subject: [PATCH 222/340] release 2014.07.11.3 --- README.md | 2 +- youtube_dl/version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index dffdaa9dc..bc5e0f76d 100644 --- a/README.md +++ b/README.md @@ -255,7 +255,7 @@ which means you can modify it, redistribute it or use it however you like. 128K (default 5) --recode-video FORMAT Encode the video to another format if necessary (currently supported: - mp4|flv|ogg|webm) + mp4|flv|ogg|webm|mkv) -k, --keep-video keeps the video file on disk after the post-processing; the video is erased by default diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 7ea6e7d43..2c9591630 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.11.2' +__version__ = '2014.07.11.3' From aaefb347c0177d0b3f3fe6ade08fe4657479ee4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 11 Jul 2014 22:23:00 +0700 Subject: [PATCH 223/340] [gorillavid] Fix embedded videos extraction --- youtube_dl/extractor/gorillavid.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py index 50ef54cce..ca5f7c417 100644 --- a/youtube_dl/extractor/gorillavid.py +++ b/youtube_dl/extractor/gorillavid.py @@ -14,8 +14,8 @@ from ..utils import ( class GorillaVidIE(InfoExtractor): IE_DESC = 'GorillaVid.in and daclips.in' _VALID_URL = r'''(?x) - https?://(?:www\.)? - (?:daclips\.in|gorillavid\.in)/ + https?://(?P(?:www\.)? + (?:daclips\.in|gorillavid\.in))/ (?:embed-)?(?P[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)? ''' @@ -39,6 +39,7 @@ class GorillaVidIE(InfoExtractor): }, }, { 'url': 'http://daclips.in/3rso4kdn6f9m', + 'md5': '1ad8fd39bb976eeb66004d3a4895f106', 'info_dict': { 'id': '3rso4kdn6f9m', 'ext': 'mp4', @@ -51,7 +52,7 @@ class GorillaVidIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage('http://%s/%s' % (mobj.group('host'), video_id), video_id) fields = dict(re.findall(r'''(?x) Date: Fri, 11 Jul 2014 22:52:48 +0700 Subject: [PATCH 224/340] [screencast] Add one more format and improve title extraction --- youtube_dl/extractor/screencast.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py index ba69739b2..306869e6a 100644 --- a/youtube_dl/extractor/screencast.py +++ b/youtube_dl/extractor/screencast.py @@ -43,6 +43,16 @@ class ScreencastIE(InfoExtractor): 'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.', 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', } + }, { + 'url': 'http://www.screencast.com/t/X3ddTrYh', + 'md5': '669ee55ff9c51988b4ebc0877cc8b159', + 'info_dict': { + 'id': 'X3ddTrYh', + 'ext': 'wmv', + 'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression', + 'description': 'md5:7b9f393bc92af02326a5c5889639eab0', + 'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', + } }, ] @@ -59,6 +69,12 @@ class ScreencastIE(InfoExtractor): flash_vars_s = self._html_search_regex( r'>(.*?)<', + [r'Title: ([^<]*)', + r'class="tabSeperator">>(.*?)<'], webpage, 'title') thumbnail = self._og_search_thumbnail(webpage) description = self._og_search_description(webpage, default=None) From 678f58de4bf8c07116e4ea2255770ab0ba665c14 Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Sat, 12 Jul 2014 00:42:42 +0300 Subject: [PATCH 225/340] [firedrive] Add new extractor. Addresses #3095 --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/firedrive.py | 81 +++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 youtube_dl/extractor/firedrive.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 15d2f0e2a..c215811c3 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -83,6 +83,7 @@ from .extremetube import ExtremeTubeIE from .facebook import FacebookIE from .faz import FazIE from .fc2 import FC2IE +from .firedrive import FiredriveIE from .firstpost import FirstpostIE from .firsttv import FirstTVIE from .fivemin import FiveMinIE diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py new file mode 100644 index 000000000..1d83048e8 --- /dev/null +++ b/youtube_dl/extractor/firedrive.py @@ -0,0 +1,81 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + compat_urllib_parse, + compat_urllib_request, + determine_ext, +) + + +class FiredriveIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?firedrive\.com/' + \ + '(?:file|embed)/(?P[0-9a-zA-Z]+)' + _FILE_DELETED_REGEX = r'
      ' + + _TESTS = [{ + 'url': 'https://www.firedrive.com/file/FEB892FA160EBD01', + 'md5': 'd5d4252f80ebeab4dc2d5ceaed1b7970', + 'info_dict': { + 'id': 'FEB892FA160EBD01', + 'ext': 'flv', + 'title': 'bbb_theora_486kbit.flv', + 'thumbnail': 're:http://.*\.jpg', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + url = 'http://firedrive.com/file/%s' % video_id + + webpage = self._download_webpage(url, video_id) + + if re.search(self._FILE_DELETED_REGEX, webpage) is not None: + raise ExtractorError(u'Video %s does not exist' % video_id, + expected=True) + + fields = dict(re.findall(r'''(?x)(.+)
      ', + webpage, 'title') + thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage, + 'thumbnail', fatal=False, default="") + url = self._search_regex(r'file:\s?\'(http[^\']+)\',', + webpage, 'file url') + ext = self._search_regex(r'type:\s?\'([^\']+)\',', + webpage, 'extension', fatal=False) + + formats = [{ + 'format_id': 'sd', + 'url': url, + 'ext': ext or determine_ext(url), + 'quality': 1, + }] + + return { + 'id': video_id, + 'title': title, + 'thumbnail': "http:" + thumbnail, + 'formats': formats, + } From 0d90e0f067842d35ec802cff4fcbd882023135fe Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 12 Jul 2014 14:23:54 +0200 Subject: [PATCH 226/340] Credit @naglis for firedrive (#3242) --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 89a2cb3e8..5e16a5491 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -63,6 +63,7 @@ __authors__ = ( 'Ariset Llerena', 'Adam Malcontenti-Wilson', 'Tobias Bell', + 'Naglis Jonaitis', ) __license__ = 'Public Domain' From c993c829e22cec2e1424ff45deedeecc9638bd5e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 12 Jul 2014 14:27:14 +0200 Subject: [PATCH 227/340] [firedrive] Simplify --- youtube_dl/extractor/firedrive.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py index 1d83048e8..d26145db1 100644 --- a/youtube_dl/extractor/firedrive.py +++ b/youtube_dl/extractor/firedrive.py @@ -24,7 +24,7 @@ class FiredriveIE(InfoExtractor): 'id': 'FEB892FA160EBD01', 'ext': 'flv', 'title': 'bbb_theora_486kbit.flv', - 'thumbnail': 're:http://.*\.jpg', + 'thumbnail': 're:^http://.*\.jpg$', }, }] @@ -37,7 +37,7 @@ class FiredriveIE(InfoExtractor): webpage = self._download_webpage(url, video_id) if re.search(self._FILE_DELETED_REGEX, webpage) is not None: - raise ExtractorError(u'Video %s does not exist' % video_id, + raise ExtractorError('Video %s does not exist' % video_id, expected=True) fields = dict(re.findall(r'''(?x)(.+)', webpage, 'title') thumbnail = self._search_regex(r'image:\s?"(//[^\"]+)', webpage, - 'thumbnail', fatal=False, default="") - url = self._search_regex(r'file:\s?\'(http[^\']+)\',', - webpage, 'file url') + 'thumbnail', fatal=False) + if thumbnail is not None: + thumbnail = 'http:' + thumbnail + ext = self._search_regex(r'type:\s?\'([^\']+)\',', webpage, 'extension', fatal=False) + video_url = self._search_regex( + r'file:\s?\'(http[^\']+)\',', webpage, 'file url') formats = [{ 'format_id': 'sd', - 'url': url, - 'ext': ext or determine_ext(url), - 'quality': 1, + 'url': video_url, + 'ext': ext, }] return { 'id': video_id, 'title': title, - 'thumbnail': "http:" + thumbnail, + 'thumbnail': thumbnail, 'formats': formats, } From 34dbcb8505897ffc91197e6db909bf38d390475e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 12 Jul 2014 22:08:33 +0700 Subject: [PATCH 228/340] [ndr] Replace 404 test --- youtube_dl/extractor/ndr.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 3d6096e46..94d5ba982 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -18,15 +18,15 @@ class NDRIE(InfoExtractor): _TESTS = [ { - 'url': 'http://www.ndr.de/fernsehen/sendungen/markt/markt7959.html', - 'md5': 'e7a6079ca39d3568f4996cb858dd6708', + 'url': 'http://www.ndr.de/fernsehen/media/dienordreportage325.html', + 'md5': '4a4eeafd17c3058b65f0c8f091355855', 'note': 'Video file', 'info_dict': { - 'id': '7959', + 'id': '325', 'ext': 'mp4', - 'title': 'Markt - die ganze Sendung', - 'description': 'md5:af9179cf07f67c5c12dc6d9997e05725', - 'duration': 2655, + 'title': 'Blaue Bohnen aus Blocken', + 'description': 'md5:190d71ba2ccddc805ed01547718963bc', + 'duration': 1715, }, }, { From 81650f95e2d28f4acc8a864c5221f1e95f75adda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 13 Jul 2014 04:03:22 +0700 Subject: [PATCH 229/340] [ruhd] Add extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/ruhd.py | 46 ++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 youtube_dl/extractor/ruhd.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c215811c3..e89a83e32 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -240,6 +240,7 @@ from .rtbf import RTBFIE from .rtlnow import RTLnowIE from .rts import RTSIE from .rtve import RTVEALaCartaIE +from .ruhd import RUHDIE from .rutube import ( RutubeIE, RutubeChannelIE, diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py new file mode 100644 index 000000000..55b58e5e6 --- /dev/null +++ b/youtube_dl/extractor/ruhd.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class RUHDIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P\d+)' + _TEST = { + 'url': 'http://www.ruhd.ru/play.php?vid=207', + 'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83', + 'info_dict': { + 'id': '207', + 'ext': 'divx', + 'title': 'КОТ бааааам', + 'description': 'классный кот)', + 'thumbnail': 're:^http://.*\.jpg$', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_regex( + r'([^<]+)   RUHD.ru - Видео Высокого качества №1 в России!', webpage, 'title') + description = self._html_search_regex( + r'(?s)
      (.+?)', webpage, 'description', fatal=False) + thumbnail = self._html_search_regex( + r' Date: Thu, 10 Jul 2014 04:10:02 +0200 Subject: [PATCH 230/340] [ReverbNation] Add new IE - closes #2250 --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/reverbnation.py | 45 ++++++++++++++++++++++++++++ youtube_dl/utils.py | 2 +- 3 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 youtube_dl/extractor/reverbnation.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e89a83e32..a03f9d3ad 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -232,6 +232,7 @@ from .radiofrance import RadioFranceIE from .rai import RaiIE from .rbmaradio import RBMARadioIE from .redtube import RedTubeIE +from .reverbnation import ReverbNationIE from .ringtv import RingTVIE from .ro220 import Ro220IE from .rottentomatoes import RottenTomatoesIE diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py new file mode 100644 index 000000000..49cf427a1 --- /dev/null +++ b/youtube_dl/extractor/reverbnation.py @@ -0,0 +1,45 @@ +from __future__ import unicode_literals + +import re +import time + +from .common import InfoExtractor +from ..utils import strip_jsonp + + +class ReverbNationIE(InfoExtractor): + _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P\d+).*?$' + _TESTS = [{ + 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', + 'file': '16965047.mp3', + 'md5': '3da12ebca28c67c111a7f8b262d3f7a7', + 'info_dict': { + "title": "MONA LISA", + "uploader": "ALKILADOS", + "uploader_id": 216429, + "thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg" + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + song_id = mobj.group('id') + + api_res = self._download_json( + 'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d' + % (song_id, int(time.time() * 1000)), + song_id, + transform_source=strip_jsonp, + note='Downloading information of song %s' % song_id + ) + + return { + 'id': song_id, + 'title': api_res.get('name'), + 'url': api_res.get('url'), + 'uploader': api_res.get('artist', {}).get('name'), + 'uploader_id': api_res.get('artist', {}).get('id'), + 'thumbnail': api_res.get('image', api_res.get('thumbnail')), + 'ext': 'mp3', + 'vcodec': 'none', + } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 2cba2bfc1..a2890b764 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1428,7 +1428,7 @@ US_RATINGS = { def strip_jsonp(code): - return re.sub(r'(?s)^[a-zA-Z_]+\s*\(\s*(.*)\);\s*?\s*$', r'\1', code) + return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\)\s*?\s*$', r'\1', code) def qualities(quality_ids): From 6a46dc8db7c5d71107cc555a0f178c7c26c109d6 Mon Sep 17 00:00:00 2001 From: Adam Malcontenti-Wilson Date: Sun, 13 Jul 2014 12:48:30 +1000 Subject: [PATCH 231/340] Add southpark.cc.com to southpark IE --- youtube_dl/extractor/southparkstudios.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southparkstudios.py index aea8e6439..e2df242c5 100644 --- a/youtube_dl/extractor/southparkstudios.py +++ b/youtube_dl/extractor/southparkstudios.py @@ -5,7 +5,7 @@ from .mtv import MTVServicesInfoExtractor class SouthParkStudiosIE(MTVServicesInfoExtractor): IE_NAME = 'southparkstudios.com' - _VALID_URL = r'https?://(www\.)?(?Psouthparkstudios\.com/(clips|full-episodes)/(?P.+?)(\?|#|$))' + _VALID_URL = r'https?://(www\.)?(?P(?:southpark\.cc|southparkstudios)\.com/(clips|full-episodes)/(?P.+?)(\?|#|$))' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' From b1298d8e064e3c1d31bdfffe8a3b5cfed8b0b61d Mon Sep 17 00:00:00 2001 From: Adam Malcontenti-Wilson Date: Sun, 13 Jul 2014 21:15:18 +1000 Subject: [PATCH 232/340] Test for colon in mgid --- youtube_dl/extractor/mtv.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index af9490ccc..228b42d2b 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -158,6 +158,9 @@ class MTVServicesInfoExtractor(InfoExtractor): if mgid.endswith('.swf'): mgid = mgid[:-4] except RegexNotFoundError: + mgid = None + + if mgid is None or ':' not in mgid: mgid = self._search_regex( [r'data-mgid="(.*?)"', r'swfobject.embedSWF\(".*?(mgid:.*?)"'], webpage, u'mgid') From 3804b012760dcc512322b49c7ae1dc4b8231b0db Mon Sep 17 00:00:00 2001 From: Adam Malcontenti-Wilson Date: Sun, 13 Jul 2014 21:29:04 +1000 Subject: [PATCH 233/340] Update test --- youtube_dl/extractor/southparkstudios.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southparkstudios.py index e2df242c5..6955269f7 100644 --- a/youtube_dl/extractor/southparkstudios.py +++ b/youtube_dl/extractor/southparkstudios.py @@ -14,7 +14,7 @@ class SouthParkStudiosIE(MTVServicesInfoExtractor): 'info_dict': { 'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30', 'ext': 'mp4', - 'title': 'Bat Daded', + 'title': 'South Park|Bat Daded', 'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.', }, }] From 10d00a756aa79a5f5e56ea75fd8d80aff3cb2b23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 13 Jul 2014 14:08:23 +0200 Subject: [PATCH 234/340] rename southparkstudios.py to southpark.py And make the extractor only recognize southpark.cc.com urls, the old urls are redirected. --- youtube_dl/extractor/__init__.py | 4 ++-- .../extractor/{southparkstudios.py => southpark.py} | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) rename youtube_dl/extractor/{southparkstudios.py => southpark.py} (75%) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a03f9d3ad..e49ac3e52 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -270,8 +270,8 @@ from .soundcloud import ( SoundcloudPlaylistIE ) from .soundgasm import SoundgasmIE -from .southparkstudios import ( - SouthParkStudiosIE, +from .southpark import ( + SouthParkIE, SouthparkDeIE, ) from .space import SpaceIE diff --git a/youtube_dl/extractor/southparkstudios.py b/youtube_dl/extractor/southpark.py similarity index 75% rename from youtube_dl/extractor/southparkstudios.py rename to youtube_dl/extractor/southpark.py index 6955269f7..c20397b3d 100644 --- a/youtube_dl/extractor/southparkstudios.py +++ b/youtube_dl/extractor/southpark.py @@ -3,14 +3,14 @@ from __future__ import unicode_literals from .mtv import MTVServicesInfoExtractor -class SouthParkStudiosIE(MTVServicesInfoExtractor): - IE_NAME = 'southparkstudios.com' - _VALID_URL = r'https?://(www\.)?(?P(?:southpark\.cc|southparkstudios)\.com/(clips|full-episodes)/(?P.+?)(\?|#|$))' +class SouthParkIE(MTVServicesInfoExtractor): + IE_NAME = 'southpark.cc.com' + _VALID_URL = r'https?://(www\.)?(?Psouthpark\.cc\.com/(clips|full-episodes)/(?P.+?)(\?|#|$))' _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' _TESTS = [{ - 'url': 'http://www.southparkstudios.com/clips/104437/bat-daded#tab=featured', + 'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured', 'info_dict': { 'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30', 'ext': 'mp4', @@ -20,7 +20,7 @@ class SouthParkStudiosIE(MTVServicesInfoExtractor): }] -class SouthparkDeIE(SouthParkStudiosIE): +class SouthparkDeIE(SouthParkIE): IE_NAME = 'southpark.de' _VALID_URL = r'https?://(www\.)?(?Psouthpark\.de/(clips|alle-episoden)/(?P.+?)(\?|#|$))' _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/' From 9dcea3998565838af1a0821929d7d149ae658971 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 13 Jul 2014 14:38:26 +0200 Subject: [PATCH 235/340] [tlc.de] If the url contains a fragment, use if in the iframe url (reported in #2748) The fragment is used in the webpage for selecting different videos. --- youtube_dl/extractor/tlc.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index ad175b83e..d848ee186 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from .brightcove import BrightcoveIE from .discovery import DiscoveryIE +from ..utils import compat_urlparse class TlcIE(DiscoveryIE): @@ -51,6 +52,10 @@ class TlcDeIE(InfoExtractor): # Otherwise we don't get the correct 'BrightcoveExperience' element, # example: http://www.tlc.de/sendungen/cake-boss/videos/cake-boss-cannoli-drama/ iframe_url = iframe_url.replace('.htm?', '.php?') + url_fragment = compat_urlparse.urlparse(url).fragment + if url_fragment: + # Since the fragment is not send to the server, we always get the same iframe + iframe_url = re.sub(r'playlist=(\d+)', 'playlist=%s' % url_fragment, iframe_url) iframe = self._download_webpage(iframe_url, title) return { From 76233cda34a3795b405cd0b2ded14fc38930263f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 14 Jul 2014 00:38:10 +0700 Subject: [PATCH 236/340] [pyvideo] Fix title extraction --- youtube_dl/extractor/pyvideo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py index 0bc0859b4..6d5732d45 100644 --- a/youtube_dl/extractor/pyvideo.py +++ b/youtube_dl/extractor/pyvideo.py @@ -46,7 +46,7 @@ class PyvideoIE(InfoExtractor): return self.url_result(m_youtube.group(1), 'Youtube') title = self._html_search_regex( - r'
      .*?([^>]+?)

      ', + r'
      \s*]*)?>([^>]+?)

      ', webpage, 'title', flags=re.DOTALL) video_url = self._search_regex( [r'Download.*? Date: Mon, 14 Jul 2014 00:41:23 +0200 Subject: [PATCH 237/340] Fix utils.strip_jsonp --- youtube_dl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a2890b764..64a9618ca 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1428,7 +1428,7 @@ US_RATINGS = { def strip_jsonp(code): - return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\)\s*?\s*$', r'\1', code) + return re.sub(r'(?s)^[a-zA-Z0-9_]+\s*\(\s*(.*)\);?\s*?\s*$', r'\1', code) def qualities(quality_ids): From b1b01841afac9b65b706c3436a5717b603458491 Mon Sep 17 00:00:00 2001 From: Charles Chen Date: Mon, 14 Jul 2014 11:00:55 -0700 Subject: [PATCH 238/340] [MLB] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/mlb.py | 67 ++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 youtube_dl/extractor/mlb.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 15d2f0e2a..f75939a05 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -169,6 +169,7 @@ from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mit import TechTVMITIE, MITIE, OCWMITIE from .mixcloud import MixcloudIE +from .mlb import MlbIE from .mpora import MporaIE from .mofosex import MofosexIE from .mooshare import MooshareIE diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py new file mode 100644 index 000000000..2b500bdff --- /dev/null +++ b/youtube_dl/extractor/mlb.py @@ -0,0 +1,67 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class MlbIE(InfoExtractor): + _VALID_URL = r'http?://m\.mlb\.com/video/topic/[0-9]+/v(?Pn?\d+)/.*$' + _TEST = { + 'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby', + 'md5': u'd9c022c10d21f849f49c05ae12a8a7e9', + 'info_dict': { + 'id': '34496663', + 'ext': 'mp4', + 'format': 'mp4', + 'description': "7/11/14: Giancarlo Stanton practices for the Home Run Derby prior to the game against the Mets", + 'title': "Stanton prepares for Derby", + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage, default=video_id) + description = self._html_search_regex(r'', webpage, 'description', fatal=False) + thumbnail = self._html_search_regex(r'', webpage, 'image', fatal=False) + + # use the thumbnail URL to find the folder that contains the videos + _image_url = r'http://mediadownloads.mlb.com/mlbam/(?P<_date>n?.+)/images/.*$' + bobj = re.match(_image_url, thumbnail) + datestr = bobj.group('_date') + base_url = 'http://mediadownloads.mlb.com/mlbam/' + datestr + filespage = self._download_webpage(base_url, video_id) + + # Try 1800K, 1500K, 1200K, 600K, then 300K videos + video = self._html_search_regex(r'
    1. Date: Tue, 15 Jul 2014 19:18:06 +0700 Subject: [PATCH 239/340] [soundcloud] Replace 404 test --- youtube_dl/extractor/soundcloud.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 14ec9452d..8a77c1370 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -81,16 +81,16 @@ class SoundcloudIE(InfoExtractor): }, # downloadable song { - 'url': 'https://soundcloud.com/simgretina/just-your-problem-baby-1', - 'md5': '56a8b69568acaa967b4c49f9d1d52d19', + 'url': 'https://soundcloud.com/oddsamples/bus-brakes', + 'md5': 'fee7b8747b09bb755cefd4b853e7249a', 'info_dict': { - 'id': '105614606', + 'id': '128590877', 'ext': 'wav', - 'title': 'Just Your Problem Baby (Acapella)', - 'description': 'Vocals', - 'uploader': 'Sim Gretina', - 'upload_date': '20130815', - #'duration': 42, + 'title': 'Bus Brakes', + 'description': 'md5:0170be75dd395c96025d210d261c784e', + 'uploader': 'oddsamples', + 'upload_date': '20140109', + 'duration': 17, }, }, ] From bd1f325b427eaea944b4b01ef4ee7c3559caac5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 15 Jul 2014 19:32:42 +0700 Subject: [PATCH 240/340] [tutv] Replace 404 test and modernize --- youtube_dl/extractor/tutv.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py index c980153ec..d516b6427 100644 --- a/youtube_dl/extractor/tutv.py +++ b/youtube_dl/extractor/tutv.py @@ -1,21 +1,21 @@ from __future__ import unicode_literals + import base64 import re from .common import InfoExtractor -from ..utils import ( - compat_parse_qs, -) +from ..utils import compat_parse_qs class TutvIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P[^/?]+)' _TEST = { - 'url': 'http://tu.tv/videos/noah-en-pabellon-cuahutemoc', - 'file': '2742556.flv', - 'md5': '5eb766671f69b82e528dc1e7769c5cb2', + 'url': 'http://tu.tv/videos/robots-futbolistas', + 'md5': '627c7c124ac2a9b5ab6addb94e0e65f7', 'info_dict': { - 'title': 'Noah en pabellon cuahutemoc', + 'id': '2973058', + 'ext': 'flv', + 'title': 'Robots futbolistas', }, } @@ -26,10 +26,9 @@ class TutvIE(InfoExtractor): webpage = self._download_webpage(url, video_id) internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID') - data_url = 'http://tu.tv/flvurl.php?codVideo=' + str(internal_id) - data_content = self._download_webpage(data_url, video_id, note='Downloading video info') - data = compat_parse_qs(data_content) - video_url = base64.b64decode(data['kpt'][0]).decode('utf-8') + data_content = self._download_webpage( + 'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info') + video_url = base64.b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8') return { 'id': internal_id, From ad25aee2458527c38bdd3a8f1dec318ea807052c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 15 Jul 2014 22:46:39 +0200 Subject: [PATCH 241/340] [youtube & jsinterp] Fix signature extraction (fixes #3255) Some functions are defined now inside an object, the jsinterp will search its definition if the variable is not defined in the local namespace. --- test/test_youtube_signature.py | 6 +++++ youtube_dl/jsinterp.py | 40 +++++++++++++++++++++++++++++++--- 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 8d46fe108..d95533959 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -33,6 +33,12 @@ _TESTS = [ 90, u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', ), + ( + u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js', + u'js', + 84, + u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=', + ), ( u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', u'js', diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 3bbb07704..ae5bca2e6 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -11,6 +11,7 @@ class JSInterpreter(object): def __init__(self, code): self.code = code self._functions = {} + self._objects = {} def interpret_statement(self, stmt, local_vars, allow_recursion=20): if allow_recursion < 0: @@ -55,7 +56,19 @@ class JSInterpreter(object): m = re.match(r'^(?P[a-z]+)\.(?P.*)$', expr) if m: member = m.group('member') - val = local_vars[m.group('in')] + variable = m.group('in') + + if variable not in local_vars: + if variable not in self._objects: + self._objects[variable] = self.extract_object(variable) + obj = self._objects[variable] + key, args = member.split('(', 1) + args = args.strip(')') + argvals = [int(v) if v.isdigit() else local_vars[v] + for v in args.split(',')] + return obj[key](argvals) + + val = local_vars[variable] if member == 'split("")': return list(val) if member == 'join("")': @@ -97,6 +110,25 @@ class JSInterpreter(object): return self._functions[fname](argvals) raise ExtractorError('Unsupported JS expression %r' % expr) + def extract_object(self, objname): + obj = {} + obj_m = re.search( + (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) + + r'\s*(?P([a-zA-Z$]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' + + r'\}\s*;', + self.code) + fields = obj_m.group('fields') + # Currently, it only supports function definitions + fields_m = re.finditer( + r'(?P[a-zA-Z$]+)\s*:\s*function' + r'\((?P[a-z,]+)\){(?P[^}]+)}', + fields) + for f in fields_m: + argnames = f.group('args').split(',') + obj[f.group('key')] = self.build_function(argnames, f.group('code')) + + return obj + def extract_function(self, funcname): func_m = re.search( (r'(?:function %s|[{;]%s\s*=\s*function)' % ( @@ -107,10 +139,12 @@ class JSInterpreter(object): raise ExtractorError('Could not find JS function %r' % funcname) argnames = func_m.group('args').split(',') + return self.build_function(argnames, func_m.group('code')) + + def build_function(self, argnames, code): def resf(args): local_vars = dict(zip(argnames, args)) - for stmt in func_m.group('code').split(';'): + for stmt in code.split(';'): res = self.interpret_statement(stmt, local_vars) return res return resf - From 172240c0a40f44d2aa384c512cc65c7e4c9e3660 Mon Sep 17 00:00:00 2001 From: Charles Chen Date: Tue, 15 Jul 2014 13:55:23 -0700 Subject: [PATCH 242/340] Switched to use media detail XML to extract video URL --- youtube_dl/extractor/mlb.py | 57 +++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py index 2b500bdff..61ba58843 100644 --- a/youtube_dl/extractor/mlb.py +++ b/youtube_dl/extractor/mlb.py @@ -28,37 +28,44 @@ class MlbIE(InfoExtractor): title = self._og_search_title(webpage, default=video_id) description = self._html_search_regex(r'', webpage, 'description', fatal=False) thumbnail = self._html_search_regex(r'', webpage, 'image', fatal=False) + + # use the video_id to find the Media detail XML + id_len = len(video_id) + _mediadetail_url = 'http://m.mlb.com/gen/multimedia/detail/'+video_id[id_len-3]+'/'+video_id[id_len-2]+'/'+video_id[id_len-1]+'/'+video_id+'.xml' - # use the thumbnail URL to find the folder that contains the videos - _image_url = r'http://mediadownloads.mlb.com/mlbam/(?P<_date>n?.+)/images/.*$' - bobj = re.match(_image_url, thumbnail) - datestr = bobj.group('_date') - base_url = 'http://mediadownloads.mlb.com/mlbam/' + datestr - filespage = self._download_webpage(base_url, video_id) - - # Try 1800K, 1500K, 1200K, 600K, then 300K videos - video = self._html_search_regex(r'
    2. Date: Tue, 15 Jul 2014 22:59:12 +0200 Subject: [PATCH 243/340] release 2014.07.15 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2c9591630..4d606c3d2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.11.3' +__version__ = '2014.07.15' From 66aa382eae9342506db64ce3328a009fd3f06d5c Mon Sep 17 00:00:00 2001 From: Naglis Jonaitis Date: Wed, 16 Jul 2014 02:07:20 +0300 Subject: [PATCH 244/340] [sockshare] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/sockshare.py | 77 +++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 youtube_dl/extractor/sockshare.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e49ac3e52..f3575b6c9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -262,6 +262,7 @@ from .smotri import ( SmotriUserIE, SmotriBroadcastIE, ) +from .sockshare import SockshareIE from .sohu import SohuIE from .soundcloud import ( SoundcloudIE, diff --git a/youtube_dl/extractor/sockshare.py b/youtube_dl/extractor/sockshare.py new file mode 100644 index 000000000..cbf2d7abe --- /dev/null +++ b/youtube_dl/extractor/sockshare.py @@ -0,0 +1,77 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from ..utils import ( + ExtractorError, + compat_urllib_parse, + compat_urllib_request, + determine_ext, +) +import re + +from .common import InfoExtractor + + +class SockshareIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?sockshare\.com/file/(?P[0-9A-Za-z]+)' + _FILE_DELETED_REGEX = r'This file doesn\'t exist, or has been removed\.' + _TEST = { + 'url': 'http://www.sockshare.com/file/437BE28B89D799D7', + 'md5': '9d0bf1cfb6dbeaa8d562f6c97506c5bd', + 'info_dict': { + 'id': '437BE28B89D799D7', + 'title': 'big_buck_bunny_720p_surround.avi', + 'ext': 'avi', + 'thumbnail': 're:^http://.*\.jpg$', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + url = 'http://sockshare.com/file/%s' % video_id + webpage = self._download_webpage(url, video_id) + + if re.search(self._FILE_DELETED_REGEX, webpage) is not None: + raise ExtractorError(u'Video %s does not exist' % video_id, + expected=True) + + confirm_hash = self._html_search_regex(r'''(?x)(.+)', webpage, 'title') + thumbnail = self._html_search_regex(r' Date: Wed, 16 Jul 2014 20:40:28 +0700 Subject: [PATCH 245/340] [mlb] Extract more metadata and all formats, provide more tests --- youtube_dl/extractor/__init__.py | 2 +- youtube_dl/extractor/mlb.py | 132 +++++++++++++++++++------------ 2 files changed, 81 insertions(+), 53 deletions(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 14133c315..c5961cab9 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -170,7 +170,7 @@ from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mit import TechTVMITIE, MITIE, OCWMITIE from .mixcloud import MixcloudIE -from .mlb import MlbIE +from .mlb import MLBIE from .mpora import MporaIE from .mofosex import MofosexIE from .mooshare import MooshareIE diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py index 61ba58843..18ab2c135 100644 --- a/youtube_dl/extractor/mlb.py +++ b/youtube_dl/extractor/mlb.py @@ -3,72 +3,100 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ( + parse_duration, + parse_iso8601, + find_xpath_attr, +) -class MlbIE(InfoExtractor): - _VALID_URL = r'http?://m\.mlb\.com/video/topic/[0-9]+/v(?Pn?\d+)/.*$' - _TEST = { - 'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby', - 'md5': u'd9c022c10d21f849f49c05ae12a8a7e9', - 'info_dict': { - 'id': '34496663', - 'ext': 'mp4', - 'format': 'mp4', - 'description': "7/11/14: Giancarlo Stanton practices for the Home Run Derby prior to the game against the Mets", - 'title': "Stanton prepares for Derby", +class MLBIE(InfoExtractor): + _VALID_URL = r'http?://m\.mlb\.com/video/(?:topic/[\da-z_-]+/)?v(?Pn?\d+)' + _TESTS = [ + { + 'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby', + 'md5': 'd9c022c10d21f849f49c05ae12a8a7e9', + 'info_dict': { + 'id': '34496663', + 'ext': 'mp4', + 'title': 'Stanton prepares for Derby', + 'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57', + 'duration': 46, + 'timestamp': 1405105800, + 'upload_date': '20140711', + 'thumbnail': 're:^https?://.*\.jpg$', + }, }, - } + { + 'url': 'http://m.mlb.com/video/topic/vtp_hrd_sponsor/v34578115/hrd-cespedes-wins-2014-gillette-home-run-derby', + 'md5': '0e6e73d509321e142409b695eadd541f', + 'info_dict': { + 'id': '34578115', + 'ext': 'mp4', + 'title': 'Cespedes repeats as Derby champ', + 'description': 'md5:08df253ce265d4cf6fb09f581fafad07', + 'duration': 488, + 'timestamp': 1405399936, + 'upload_date': '20140715', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }, + { + 'url': 'http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance', + 'md5': 'b8fd237347b844365d74ea61d4245967', + 'info_dict': { + 'id': '34577915', + 'ext': 'mp4', + 'title': 'Bautista on Home Run Derby', + 'description': 'md5:b80b34031143d0986dddc64a8839f0fb', + 'duration': 52, + 'timestamp': 1405390722, + 'upload_date': '20140715', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }, + ] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - webpage = self._download_webpage(url, video_id) + detail = self._download_xml( + 'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml' + % (video_id[-3], video_id[-2], video_id[-1], video_id), video_id) - title = self._og_search_title(webpage, default=video_id) - description = self._html_search_regex(r'', webpage, 'description', fatal=False) - thumbnail = self._html_search_regex(r'', webpage, 'image', fatal=False) + title = detail.find('./headline').text + description = detail.find('./big-blurb').text + duration = parse_duration(detail.find('./duration').text) + timestamp = parse_iso8601(detail.attrib['date'][:-5]) - # use the video_id to find the Media detail XML - id_len = len(video_id) - _mediadetail_url = 'http://m.mlb.com/gen/multimedia/detail/'+video_id[id_len-3]+'/'+video_id[id_len-2]+'/'+video_id[id_len-1]+'/'+video_id+'.xml' - - mediadetails = self._download_xml(_mediadetail_url, video_id, "Downloading media detail...") - has1500K = 0 - has1200K = 0 - has600K = 0 - # loop through the list of url's and only get the highest quality MP4 content - for element in mediadetails.findall('url'): - scenario = element.attrib['playback_scenario'] - if scenario.startswith(u'FLASH'): - if scenario.startswith(u'FLASH_1800K'): - video_url = element.text - # 1800K is the current highest quality video on MLB.com - break - else: - if scenario.startswith(u'FLASH_1500K'): - video_url = element.text - has1500K = 1 - else: - if (scenario.startswith(u'FLASH_1200K') and not has1500K): - video_url = element.text - has1200K = 1 - else: - if (scenario.startswith(u'FLASH_600K') and not has1200K): - video_url = element.text - has600K = 1 - else: - if (scenario.startswith(u'FLASH_300K') and not has600K): - video_url = element.text + thumbnail = find_xpath_attr( + detail, './thumbnailScenarios/thumbnailScenario', 'type', '45').text + + formats = [] + for media_url in detail.findall('./url'): + playback_scenario = media_url.attrib['playback_scenario'] + fmt = { + 'url': media_url.text, + 'format_id': playback_scenario, + } + m = re.search(r'(?P\d+)K_(?P\d+)X(?P\d+)', playback_scenario) + if m: + fmt.update({ + 'vbr': int(m.group('vbr')) * 1000, + 'width': int(m.group('width')), + 'height': int(m.group('height')), + }) + formats.append(fmt) + + self._sort_formats(formats) return { 'id': video_id, - 'url': video_url, - 'extractor': 'mlb', - 'webpage_url': url, 'title': title, - 'ext': 'mp4', - 'format': 'mp4', 'description': description, + 'duration': duration, + 'timestamp': timestamp, + 'formats': formats, 'thumbnail': thumbnail, } From 43f0537c06384b9b97235a93ea39649ee3de4d45 Mon Sep 17 00:00:00 2001 From: hassaanaliw Date: Wed, 16 Jul 2014 18:45:42 +0500 Subject: [PATCH 246/340] [cracked] Add new extractor --- youtube_dl/extractor/__init__.py | 2 ++ youtube_dl/extractor/cracked.py | 46 ++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 youtube_dl/extractor/cracked.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e49ac3e52..78b95c2a5 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -52,6 +52,7 @@ from .cnn import ( from .collegehumor import CollegeHumorIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .condenast import CondeNastIE +from .cracked import CrackedIE from .criterion import CriterionIE from .crunchyroll import CrunchyrollIE from .cspan import CSpanIE @@ -396,6 +397,7 @@ from .youtube import ( YoutubeUserIE, YoutubeWatchLaterIE, ) + from .zdf import ZDFIE diff --git a/youtube_dl/extractor/cracked.py b/youtube_dl/extractor/cracked.py new file mode 100644 index 000000000..37c0f7ffb --- /dev/null +++ b/youtube_dl/extractor/cracked.py @@ -0,0 +1,46 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +class CrackedIE(InfoExtractor): + _VALID_URL = r'http?://.*?\.cracked\.com/video_+(?P.*)_.*' + _TEST = { + 'url': 'http://www.cracked.com/video_18803_4-social-criticisms-hidden-in-sonic-hedgehog-games.html', + + 'info_dict': { + 'id': '18803', + 'ext': 'mp4', + 'title': "4 Social Criticisms Hidden in 'Sonic the Hedgehog' Games | Cracked.com", + 'height': 375, + 'width': 666, + + + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + title = self._search_regex(r'(.*?)',webpage,'title') + video_url = self._search_regex(r'var CK_vidSrc = "+(.*)"',webpage,'url') + width = self._search_regex(r'width="(.*?)"',webpage,'width') + height = re.findall(r'height="(.*?)"',webpage)[1] + + + + + return { + 'url':video_url, + 'id': video_id, + 'ext':'mp4', + 'title':title, + 'height':int(height), + 'width':int(width) + + + } \ No newline at end of file From 3b09757bacf4518b76ba6fef071ad8e65d6faa19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 16 Jul 2014 21:03:30 +0700 Subject: [PATCH 247/340] Credit @chaochichen for mlb (#3252) --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 5e16a5491..6e2359b28 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -64,6 +64,7 @@ __authors__ = ( 'Adam Malcontenti-Wilson', 'Tobias Bell', 'Naglis Jonaitis', + 'Charles Chen', ) __license__ = 'Public Domain' From d8894e24a4f123c1fe84112693047e638b7161df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 17 Jul 2014 01:57:38 +0700 Subject: [PATCH 248/340] [rtbf] Fix data video regex --- youtube_dl/extractor/rtbf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtbf.py b/youtube_dl/extractor/rtbf.py index 205f8a167..dce64e151 100644 --- a/youtube_dl/extractor/rtbf.py +++ b/youtube_dl/extractor/rtbf.py @@ -30,7 +30,7 @@ class RTBFIE(InfoExtractor): page = self._download_webpage('https://www.rtbf.be/video/embed?id=%s' % video_id, video_id) data = json.loads(self._html_search_regex( - r'
      Date: Wed, 16 Jul 2014 22:35:09 +0200 Subject: [PATCH 249/340] [comedycentral] Recognize 'full-episodes' urls (fixes #3277) --- youtube_dl/extractor/comedycentral.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 8af0abade..3c0ce7859 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -15,7 +15,7 @@ from ..utils import ( class ComedyCentralIE(MTVServicesInfoExtractor): _VALID_URL = r'''(?x)https?://(?:www\.)?(comedycentral|cc)\.com/ - (video-clips|episodes|cc-studios|video-collections) + (video-clips|episodes|cc-studios|video-collections|full-episodes) /(?P.*)''' _FEED_URL = 'http://comedycentral.com/feeds/mrss/' From b4c538b02b12bfc1443e724a137aa6d54a02cc9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 16 Jul 2014 22:37:01 +0200 Subject: [PATCH 250/340] [comedycentral] Only recognize the cc.com domain The old comedycentral.com urls redirect to the new urls. --- youtube_dl/extractor/comedycentral.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py index 3c0ce7859..c81ce5a96 100644 --- a/youtube_dl/extractor/comedycentral.py +++ b/youtube_dl/extractor/comedycentral.py @@ -14,13 +14,13 @@ from ..utils import ( class ComedyCentralIE(MTVServicesInfoExtractor): - _VALID_URL = r'''(?x)https?://(?:www\.)?(comedycentral|cc)\.com/ + _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/ (video-clips|episodes|cc-studios|video-collections|full-episodes) /(?P<title>.*)''' _FEED_URL = 'http://comedycentral.com/feeds/mrss/' _TEST = { - 'url': 'http://www.comedycentral.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother', + 'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother', 'md5': 'c4f48e9eda1b16dd10add0744344b6d8', 'info_dict': { 'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354', From ca14211e93f3800893d5519f370348ba2db84ddc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 17 Jul 2014 09:27:06 +0200 Subject: [PATCH 251/340] [adultswim] Simplify (closes #2952) --- youtube_dl/extractor/adultswim.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index ca1bfbdc2..fdaecdec1 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -57,8 +57,6 @@ class AdultSwimIE(InfoExtractor): ] } - _available_formats = ['150', '640', '3500'] - _video_extensions = { '3500': 'flv', '640': 'mp4', @@ -78,7 +76,7 @@ class AdultSwimIE(InfoExtractor): webpage = self._download_webpage(url, video_path) episode_id = self._html_search_regex(r'<link rel="video_src" href="http://i\.adultswim\.com/adultswim/adultswimtv/tools/swf/viralplayer.swf\?id=([0-9a-f]+?)"\s*/?\s*>', webpage, 'episode_id') - title = self._html_search_regex(r'<meta property="og:title" content="\s*(.*?)\s*"\s*/?\s*>', webpage, 'title') + title = self._og_search_title(webpage) index_url = 'http://asfix.adultswim.com/asfix-svc/episodeSearch/getEpisodesByIDs?networkName=AS&ids=%s' % episode_id idoc = self._download_xml(index_url, title, 'Downloading episode index', 'Unable to download episode index') From d9222264a8b4adcbe16286d61404acf67e0dcfa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 17 Jul 2014 09:31:48 +0200 Subject: [PATCH 252/340] [adultswim] The bitrate must be an integer or None (reported in #2952) --- youtube_dl/extractor/adultswim.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py index fdaecdec1..a00bfcb35 100644 --- a/youtube_dl/extractor/adultswim.py +++ b/youtube_dl/extractor/adultswim.py @@ -110,7 +110,8 @@ class AdultSwimIE(InfoExtractor): 'format_id': '%s-%s' % (bitrate, type), 'url': file_el.text, 'ext': self._video_extensions.get(bitrate, 'mp4'), - 'tbr': bitrate, + # The bitrate may not be a number (for example: 'iphone') + 'tbr': int(bitrate) if bitrate.isdigit() else None, 'height': height, 'width': width }) From 74aa18f68ffe5a721fae1149193c8b6401076d63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 17 Jul 2014 10:07:51 +0200 Subject: [PATCH 253/340] [dfb] Add extractor (closes #3280) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/dfb.py | 44 ++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 youtube_dl/extractor/dfb.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3e3b5d44a..03d2e446d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -63,6 +63,7 @@ from .dailymotion import ( DailymotionUserIE, ) from .daum import DaumIE +from .dfb import DFBIE from .dotsub import DotsubIE from .dreisat import DreiSatIE from .drtv import DRTVIE diff --git a/youtube_dl/extractor/dfb.py b/youtube_dl/extractor/dfb.py new file mode 100644 index 000000000..cb8e06822 --- /dev/null +++ b/youtube_dl/extractor/dfb.py @@ -0,0 +1,44 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class DFBIE(InfoExtractor): + IE_NAME = 'tv.dfb.de' + _VALID_URL = r'https?://tv\.dfb\.de/video/[^/]+/(?P<id>\d+)' + + _TEST = { + 'url': 'http://tv.dfb.de/video/highlights-des-empfangs-in-berlin/9070/', + # The md5 is different each time + 'info_dict': { + 'id': '9070', + 'ext': 'flv', + 'title': 'Highlights des Empfangs in Berlin', + 'upload_date': '20140716', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + player_info = self._download_xml( + 'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id, + video_id) + video_info = player_info.find('video') + + f4m_info = self._download_xml(video_info.find('url').text, video_id) + token_el = f4m_info.find('token') + manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0' + + return { + 'id': video_id, + 'title': video_info.find('title').text, + 'url': manifest_url, + 'ext': 'flv', + 'thumbnail': self._og_search_thumbnail(webpage), + 'upload_date': ''.join(video_info.find('time_date').text.split('.')[::-1]), + } From 530ed178b73b02087362d0b29fb158b28a37d657 Mon Sep 17 00:00:00 2001 From: MikeCol <MikeCol@gmx.net> Date: Thu, 17 Jul 2014 11:17:27 +0200 Subject: [PATCH 254/340] Redtube changed player config, new place to get thumb URL --- youtube_dl/extractor/redtube.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py index 4295cf93a..d1e12dd8d 100644 --- a/youtube_dl/extractor/redtube.py +++ b/youtube_dl/extractor/redtube.py @@ -35,9 +35,7 @@ class RedTubeIE(InfoExtractor): r'<h1 class="videoTitle[^"]*">(.+?)</h1>', webpage, u'title') - video_thumbnail = self._html_search_regex( - r'playerInnerHTML.+?<img\s+src="(.+?)"', - webpage, u'thumbnail', fatal=False) + video_thumbnail = self._og_search_thumbnail(webpage) # No self-labeling, but they describe themselves as # "Home of Videos Porno" From cf01013161621d2e8d5ff107588617a1f09db53d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 17 Jul 2014 16:28:30 +0200 Subject: [PATCH 255/340] [youtube] Find more swf players (Closes #3270, refer #3271) --- youtube_dl/extractor/youtube.py | 48 +++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6123e1256..5449df8e0 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -347,8 +347,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): self.to_screen(u'RTMP download detected') def _extract_signature_function(self, video_id, player_url, slen): - id_m = re.match(r'.*-(?P<id>[a-zA-Z0-9_-]+)\.(?P<ext>[a-z]+)$', - player_url) + id_m = re.match( + r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3)?\.(?P<ext>[a-z]+)$', + player_url) player_type = id_m.group('ext') player_id = id_m.group('id') @@ -1220,30 +1221,37 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): url += '&signature=' + url_data['sig'][0] elif 's' in url_data: encrypted_sig = url_data['s'][0] - if self._downloader.params.get('verbose'): - if age_gate: - if player_url is None: - player_version = 'unknown' - else: - player_version = self._search_regex( - r'-(.+)\.swf$', player_url, - u'flash player', fatal=False) - player_desc = 'flash player %s' % player_version - else: - player_version = self._search_regex( - r'html5player-(.+?)\.js', video_webpage, - 'html5 player', fatal=False) - player_desc = u'html5 player %s' % player_version - - parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.')) - self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' % - (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc)) if not age_gate: jsplayer_url_json = self._search_regex( r'"assets":.+?"js":\s*("[^"]+")', video_webpage, u'JS player URL') player_url = json.loads(jsplayer_url_json) + if player_url is None: + player_url_json = self._search_regex( + r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")', + video_webpage, u'age gate player URL') + player_url = json.loads(player_url_json) + + if self._downloader.params.get('verbose'): + if player_url is None: + player_version = 'unknown' + player_desc = 'unknown' + else: + if player_url.endswith('swf'): + player_version = self._search_regex( + r'-(.+)\.swf$', player_url, + u'flash player', fatal=False) + player_desc = 'flash player %s' % player_version + else: + player_version = self._search_regex( + r'html5player-(.+?)\.js', video_webpage, + 'html5 player', fatal=False) + player_desc = u'html5 player %s' % player_version + + parts_sizes = u'.'.join(compat_str(len(part)) for part in encrypted_sig.split('.')) + self.to_screen(u'encrypted signature length %d (%s), itag %s, %s' % + (len(encrypted_sig), parts_sizes, url_data['itag'][0], player_desc)) signature = self._decrypt_signature( encrypted_sig, video_id, player_url, age_gate) From 6e74521d98e4824e56d6e4db902de07a75fa867f Mon Sep 17 00:00:00 2001 From: Reventl0v <grunblattr@gmail.com> Date: Thu, 17 Jul 2014 21:08:43 +0200 Subject: [PATCH 256/340] Fix the url in the INSTALLATION section --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bc5e0f76d..fb2f776c9 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ To install it right away for all UNIX users (Linux, OS X, etc.), type: If you do not have curl, you can alternatively use a recent wget: - sudo wget https://yt-dl.org/downloads/2014.05.13/youtube-dl -O /usr/local/bin/youtube-dl + sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl sudo chmod a+x /usr/local/bin/youtube-dl Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29). From 66149e3f2b57d110045862bfbc19b3efbb50d152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Thu, 17 Jul 2014 22:29:03 +0200 Subject: [PATCH 257/340] [npo] Fix the json extraction (fixes #3282) The comment in the javascript file is not always the same. --- youtube_dl/extractor/npo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index fbcbe1f40..12e85a716 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -32,7 +32,7 @@ class NPOIE(InfoExtractor): 'http://e.omroep.nl/metadata/aflevering/%s' % video_id, video_id, # We have to remove the javascript callback - transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//epc', r'\1', j) + transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//.*$', r'\1', j) ) token_page = self._download_webpage( 'http://ida.omroep.nl/npoplayer/i.js', From 3fbd27f73ef1f3507443d0e0907a4c0b19a63ed8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Thu, 17 Jul 2014 23:22:49 +0200 Subject: [PATCH 258/340] [youtube] SWF parser: Add opcode 86 Yes, I know we need 96, but an implementation of 86 could help avoid a similar issue. --- youtube_dl/extractor/youtube.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5449df8e0..c2c4fd7e8 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -799,6 +799,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): raise NotImplementedError( u'Unsupported (void) property %r on %r' % (mname, obj)) + elif opcode == 86: # newarray + arg_count = u30(coder) + arr = [] + for i in range(arg_count): + arr.append(stack.pop()) + arr = arr[::-1] + stack.append(arr) elif opcode == 93: # findpropstrict index = u30(coder) mname = multinames[index] From 5dc3552d85ac2b3723d0548bbe44996d50891cf2 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 18 Jul 2014 00:54:17 +0200 Subject: [PATCH 259/340] [youtube] Add support for classes in swf parser --- youtube_dl/extractor/youtube.py | 99 ++++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c2c4fd7e8..16f4a047d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -507,6 +507,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): v = - ((v ^ 0xffffffff) + 1) return v + def s24(reader): + bs = reader.read(3) + assert len(bs) == 3 + first_byte = b'\xff' if (ord(bs[0:1]) >= 0x80) else b'\x00' + return struct.unpack('!i', first_byte + bs) + def read_string(reader=None): if reader is None: reader = code_reader @@ -647,16 +653,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): return methods + class AVMClass(object): + def __init__(self, name_idx): + self.name_idx = name_idx + self.method_names = {} + self.method_idxs = {} + self.methods = {} + self.method_pyfunctions = {} + self.variables = {} + + @property + def name(self): + return multinames[self.name_idx] + # Classes - TARGET_CLASSNAME = u'SignatureDecipher' - searched_idx = multinames.index(TARGET_CLASSNAME) - searched_class_id = None class_count = u30() + classes = [] for class_id in range(class_count): name_idx = u30() - if name_idx == searched_idx: - # We found the class we're looking for! - searched_class_id = class_id + classes.append(AVMClass(name_idx)) u30() # super_name idx flags = read_byte() if flags & 0x08 != 0: # Protected namespace is present @@ -668,23 +683,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): trait_count = u30() for _c2 in range(trait_count): parse_traits_info() + assert len(classes) == class_count - if searched_class_id is None: + TARGET_CLASSNAME = u'SignatureDecipher' + searched_class = next( + c for c in classes if c.name == TARGET_CLASSNAME) + if searched_class is None: raise ExtractorError(u'Target class %r not found' % TARGET_CLASSNAME) - method_names = {} - method_idxs = {} - for class_id in range(class_count): + for avm_class in classes: u30() # cinit trait_count = u30() for _c2 in range(trait_count): trait_methods = parse_traits_info() - if class_id == searched_class_id: - method_names.update(trait_methods.items()) - method_idxs.update(dict( - (idx, name) - for name, idx in trait_methods.items())) + avm_class.method_names.update(trait_methods.items()) + avm_class.method_idxs.update(dict( + (idx, name) + for name, idx in trait_methods.items())) # Scripts script_count = u30() @@ -697,7 +713,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # Method bodies method_body_count = u30() Method = collections.namedtuple('Method', ['code', 'local_count']) - methods = {} for _c in range(method_body_count): method_idx = u30() u30() # max_stack @@ -706,9 +721,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): u30() # max_scope_depth code_length = u30() code = read_bytes(code_length) - if method_idx in method_idxs: - m = Method(code, local_count) - methods[method_idxs[method_idx]] = m + for avm_class in classes: + if method_idx in avm_class.method_idxs: + m = Method(code, local_count) + avm_class.methods[avm_class.method_idxs[method_idx]] = m exception_count = u30() for _c2 in range(exception_count): u30() # from @@ -721,16 +737,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): parse_traits_info() assert p + code_reader.tell() == len(code_tag) - assert len(methods) == len(method_idxs) - method_pyfunctions = {} - - def extract_function(func_name): - if func_name in method_pyfunctions: - return method_pyfunctions[func_name] - if func_name not in methods: + def extract_function(avm_class, func_name): + if func_name in avm_class.method_pyfunctions: + return avm_class.method_pyfunctions[func_name] + if func_name not in avm_class.methods: raise ExtractorError(u'Cannot find function %r' % func_name) - m = methods[func_name] + m = avm_class.methods[func_name] def resfunc(args): registers = ['(this)'] + list(args) + [None] * m.local_count @@ -738,7 +751,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): coder = io.BytesIO(m.code) while True: opcode = struct.unpack('!B', coder.read(1))[0] - if opcode == 36: # pushbyte + if opcode == 17: # iftrue + offset = s24(coder) + value = stack.pop() + if value: + coder.seek(coder.tell() + offset) + elif opcode == 36: # pushbyte v = struct.unpack('!B', coder.read(1))[0] stack.append(v) elif opcode == 44: # pushstring @@ -776,8 +794,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): assert isinstance(obj, list) res = args[0].join(obj) stack.append(res) - elif mname in method_pyfunctions: - stack.append(method_pyfunctions[mname](args)) + elif mname in avm_class.method_pyfunctions: + stack.append(avm_class.method_pyfunctions[mname](args)) else: raise NotImplementedError( u'Unsupported property %r on %r' @@ -809,7 +827,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): elif opcode == 93: # findpropstrict index = u30(coder) mname = multinames[index] - res = extract_function(mname) + res = extract_function(avm_class, mname) + stack.append(res) + elif opcode == 94: # findproperty + index = u30(coder) + mname = multinames[index] + res = avm_class.variables.get(mname) + stack.append(res) + elif opcode == 96: # getlex + index = u30(coder) + mname = multinames[index] + res = avm_class.variables.get(mname) stack.append(res) elif opcode == 97: # setproperty index = u30(coder) @@ -848,6 +876,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): value1 = stack.pop() res = value1 % value2 stack.append(res) + elif opcode == 175: # greaterequals + value2 = stack.pop() + value1 = stack.pop() + result = value1 >= value2 + stack.append(result) elif opcode == 208: # getlocal_0 stack.append(registers[0]) elif opcode == 209: # getlocal_1 @@ -864,10 +897,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): raise NotImplementedError( u'Unsupported opcode %d' % opcode) - method_pyfunctions[func_name] = resfunc + avm_class.method_pyfunctions[func_name] = resfunc return resfunc - initial_function = extract_function(u'decipher') + initial_function = extract_function(searched_class, u'decipher') return lambda s: initial_function([s]) def _decrypt_signature(self, s, video_id, player_url, age_gate=False): From 5425626790a46f9b5bdecf4e33bb254c4c2423ea Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 18 Jul 2014 10:24:28 +0200 Subject: [PATCH 260/340] [youtube] Move swfinterp into its own file --- test/test_youtube_signature.py | 12 +- youtube_dl/extractor/youtube.py | 454 +--------------------------- youtube_dl/swfinterp.py | 503 ++++++++++++++++++++++++++++++++ 3 files changed, 516 insertions(+), 453 deletions(-) create mode 100644 youtube_dl/swfinterp.py diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index d95533959..e443e0be8 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -45,6 +45,12 @@ _TESTS = [ u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', ), + ( + u'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf', + u'swf', + 86, + u'23456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!?#$%&\'()*+,-./:;<=>"' + ), ] @@ -57,12 +63,12 @@ class TestSignature(unittest.TestCase): def make_tfunc(url, stype, sig_input, expected_sig): - basename = url.rpartition('/')[2] - m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename) - assert m, '%r should follow URL format' % basename + m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3)?\.[a-z]+$', url) + assert m, '%r should follow URL format' % url test_id = m.group(1) def test_func(self): + basename = 'player-%s.%s' % (test_id, stype) fn = os.path.join(self.TESTDATA_DIR, basename) if not os.path.exists(fn): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 16f4a047d..623056bd9 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -14,6 +14,7 @@ import zlib from .common import InfoExtractor, SearchInfoExtractor from .subtitles import SubtitlesInfoExtractor from ..jsinterp import JSInterpreter +from ..swfinterp import SWFInterpreter from ..utils import ( compat_chr, compat_parse_qs, @@ -450,457 +451,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): return lambda s: initial_function([s]) def _parse_sig_swf(self, file_contents): - if file_contents[1:3] != b'WS': - raise ExtractorError( - u'Not an SWF file; header is %r' % file_contents[:3]) - if file_contents[:1] == b'C': - content = zlib.decompress(file_contents[8:]) - else: - raise NotImplementedError(u'Unsupported compression format %r' % - file_contents[:1]) - - def extract_tags(content): - pos = 0 - while pos < len(content): - header16 = struct.unpack('<H', content[pos:pos+2])[0] - pos += 2 - tag_code = header16 >> 6 - tag_len = header16 & 0x3f - if tag_len == 0x3f: - tag_len = struct.unpack('<I', content[pos:pos+4])[0] - pos += 4 - assert pos+tag_len <= len(content) - yield (tag_code, content[pos:pos+tag_len]) - pos += tag_len - - code_tag = next(tag - for tag_code, tag in extract_tags(content) - if tag_code == 82) - p = code_tag.index(b'\0', 4) + 1 - code_reader = io.BytesIO(code_tag[p:]) - - # Parse ABC (AVM2 ByteCode) - def read_int(reader=None): - if reader is None: - reader = code_reader - res = 0 - shift = 0 - for _ in range(5): - buf = reader.read(1) - assert len(buf) == 1 - b = struct.unpack('<B', buf)[0] - res = res | ((b & 0x7f) << shift) - if b & 0x80 == 0: - break - shift += 7 - return res - - def u30(reader=None): - res = read_int(reader) - assert res & 0xf0000000 == 0 - return res - u32 = read_int - - def s32(reader=None): - v = read_int(reader) - if v & 0x80000000 != 0: - v = - ((v ^ 0xffffffff) + 1) - return v - - def s24(reader): - bs = reader.read(3) - assert len(bs) == 3 - first_byte = b'\xff' if (ord(bs[0:1]) >= 0x80) else b'\x00' - return struct.unpack('!i', first_byte + bs) - - def read_string(reader=None): - if reader is None: - reader = code_reader - slen = u30(reader) - resb = reader.read(slen) - assert len(resb) == slen - return resb.decode('utf-8') - - def read_bytes(count, reader=None): - if reader is None: - reader = code_reader - resb = reader.read(count) - assert len(resb) == count - return resb - - def read_byte(reader=None): - resb = read_bytes(1, reader=reader) - res = struct.unpack('<B', resb)[0] - return res - - # minor_version + major_version - read_bytes(2 + 2) - - # Constant pool - int_count = u30() - for _c in range(1, int_count): - s32() - uint_count = u30() - for _c in range(1, uint_count): - u32() - double_count = u30() - read_bytes((double_count-1) * 8) - string_count = u30() - constant_strings = [u''] - for _c in range(1, string_count): - s = read_string() - constant_strings.append(s) - namespace_count = u30() - for _c in range(1, namespace_count): - read_bytes(1) # kind - u30() # name - ns_set_count = u30() - for _c in range(1, ns_set_count): - count = u30() - for _c2 in range(count): - u30() - multiname_count = u30() - MULTINAME_SIZES = { - 0x07: 2, # QName - 0x0d: 2, # QNameA - 0x0f: 1, # RTQName - 0x10: 1, # RTQNameA - 0x11: 0, # RTQNameL - 0x12: 0, # RTQNameLA - 0x09: 2, # Multiname - 0x0e: 2, # MultinameA - 0x1b: 1, # MultinameL - 0x1c: 1, # MultinameLA - } - multinames = [u''] - for _c in range(1, multiname_count): - kind = u30() - assert kind in MULTINAME_SIZES, u'Invalid multiname kind %r' % kind - if kind == 0x07: - u30() # namespace_idx - name_idx = u30() - multinames.append(constant_strings[name_idx]) - else: - multinames.append('[MULTINAME kind: %d]' % kind) - for _c2 in range(MULTINAME_SIZES[kind]): - u30() - - # Methods - method_count = u30() - MethodInfo = collections.namedtuple( - 'MethodInfo', - ['NEED_ARGUMENTS', 'NEED_REST']) - method_infos = [] - for method_id in range(method_count): - param_count = u30() - u30() # return type - for _ in range(param_count): - u30() # param type - u30() # name index (always 0 for youtube) - flags = read_byte() - if flags & 0x08 != 0: - # Options present - option_count = u30() - for c in range(option_count): - u30() # val - read_bytes(1) # kind - if flags & 0x80 != 0: - # Param names present - for _ in range(param_count): - u30() # param name - mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0) - method_infos.append(mi) - - # Metadata - metadata_count = u30() - for _c in range(metadata_count): - u30() # name - item_count = u30() - for _c2 in range(item_count): - u30() # key - u30() # value - - def parse_traits_info(): - trait_name_idx = u30() - kind_full = read_byte() - kind = kind_full & 0x0f - attrs = kind_full >> 4 - methods = {} - if kind in [0x00, 0x06]: # Slot or Const - u30() # Slot id - u30() # type_name_idx - vindex = u30() - if vindex != 0: - read_byte() # vkind - elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter - u30() # disp_id - method_idx = u30() - methods[multinames[trait_name_idx]] = method_idx - elif kind == 0x04: # Class - u30() # slot_id - u30() # classi - elif kind == 0x05: # Function - u30() # slot_id - function_idx = u30() - methods[function_idx] = multinames[trait_name_idx] - else: - raise ExtractorError(u'Unsupported trait kind %d' % kind) - - if attrs & 0x4 != 0: # Metadata present - metadata_count = u30() - for _c3 in range(metadata_count): - u30() # metadata index - - return methods - - class AVMClass(object): - def __init__(self, name_idx): - self.name_idx = name_idx - self.method_names = {} - self.method_idxs = {} - self.methods = {} - self.method_pyfunctions = {} - self.variables = {} - - @property - def name(self): - return multinames[self.name_idx] - - # Classes - class_count = u30() - classes = [] - for class_id in range(class_count): - name_idx = u30() - classes.append(AVMClass(name_idx)) - u30() # super_name idx - flags = read_byte() - if flags & 0x08 != 0: # Protected namespace is present - u30() # protected_ns_idx - intrf_count = u30() - for _c2 in range(intrf_count): - u30() - u30() # iinit - trait_count = u30() - for _c2 in range(trait_count): - parse_traits_info() - assert len(classes) == class_count - + swfi = SWFInterpreter(file_contents) TARGET_CLASSNAME = u'SignatureDecipher' - searched_class = next( - c for c in classes if c.name == TARGET_CLASSNAME) - if searched_class is None: - raise ExtractorError(u'Target class %r not found' % - TARGET_CLASSNAME) - - for avm_class in classes: - u30() # cinit - trait_count = u30() - for _c2 in range(trait_count): - trait_methods = parse_traits_info() - avm_class.method_names.update(trait_methods.items()) - avm_class.method_idxs.update(dict( - (idx, name) - for name, idx in trait_methods.items())) - - # Scripts - script_count = u30() - for _c in range(script_count): - u30() # init - trait_count = u30() - for _c2 in range(trait_count): - parse_traits_info() - - # Method bodies - method_body_count = u30() - Method = collections.namedtuple('Method', ['code', 'local_count']) - for _c in range(method_body_count): - method_idx = u30() - u30() # max_stack - local_count = u30() - u30() # init_scope_depth - u30() # max_scope_depth - code_length = u30() - code = read_bytes(code_length) - for avm_class in classes: - if method_idx in avm_class.method_idxs: - m = Method(code, local_count) - avm_class.methods[avm_class.method_idxs[method_idx]] = m - exception_count = u30() - for _c2 in range(exception_count): - u30() # from - u30() # to - u30() # target - u30() # exc_type - u30() # var_name - trait_count = u30() - for _c2 in range(trait_count): - parse_traits_info() - - assert p + code_reader.tell() == len(code_tag) - - def extract_function(avm_class, func_name): - if func_name in avm_class.method_pyfunctions: - return avm_class.method_pyfunctions[func_name] - if func_name not in avm_class.methods: - raise ExtractorError(u'Cannot find function %r' % func_name) - m = avm_class.methods[func_name] - - def resfunc(args): - registers = ['(this)'] + list(args) + [None] * m.local_count - stack = [] - coder = io.BytesIO(m.code) - while True: - opcode = struct.unpack('!B', coder.read(1))[0] - if opcode == 17: # iftrue - offset = s24(coder) - value = stack.pop() - if value: - coder.seek(coder.tell() + offset) - elif opcode == 36: # pushbyte - v = struct.unpack('!B', coder.read(1))[0] - stack.append(v) - elif opcode == 44: # pushstring - idx = u30(coder) - stack.append(constant_strings[idx]) - elif opcode == 48: # pushscope - # We don't implement the scope register, so we'll just - # ignore the popped value - stack.pop() - elif opcode == 70: # callproperty - index = u30(coder) - mname = multinames[index] - arg_count = u30(coder) - args = list(reversed( - [stack.pop() for _ in range(arg_count)])) - obj = stack.pop() - if mname == u'split': - assert len(args) == 1 - assert isinstance(args[0], compat_str) - assert isinstance(obj, compat_str) - if args[0] == u'': - res = list(obj) - else: - res = obj.split(args[0]) - stack.append(res) - elif mname == u'slice': - assert len(args) == 1 - assert isinstance(args[0], int) - assert isinstance(obj, list) - res = obj[args[0]:] - stack.append(res) - elif mname == u'join': - assert len(args) == 1 - assert isinstance(args[0], compat_str) - assert isinstance(obj, list) - res = args[0].join(obj) - stack.append(res) - elif mname in avm_class.method_pyfunctions: - stack.append(avm_class.method_pyfunctions[mname](args)) - else: - raise NotImplementedError( - u'Unsupported property %r on %r' - % (mname, obj)) - elif opcode == 72: # returnvalue - res = stack.pop() - return res - elif opcode == 79: # callpropvoid - index = u30(coder) - mname = multinames[index] - arg_count = u30(coder) - args = list(reversed( - [stack.pop() for _ in range(arg_count)])) - obj = stack.pop() - if mname == u'reverse': - assert isinstance(obj, list) - obj.reverse() - else: - raise NotImplementedError( - u'Unsupported (void) property %r on %r' - % (mname, obj)) - elif opcode == 86: # newarray - arg_count = u30(coder) - arr = [] - for i in range(arg_count): - arr.append(stack.pop()) - arr = arr[::-1] - stack.append(arr) - elif opcode == 93: # findpropstrict - index = u30(coder) - mname = multinames[index] - res = extract_function(avm_class, mname) - stack.append(res) - elif opcode == 94: # findproperty - index = u30(coder) - mname = multinames[index] - res = avm_class.variables.get(mname) - stack.append(res) - elif opcode == 96: # getlex - index = u30(coder) - mname = multinames[index] - res = avm_class.variables.get(mname) - stack.append(res) - elif opcode == 97: # setproperty - index = u30(coder) - value = stack.pop() - idx = stack.pop() - obj = stack.pop() - assert isinstance(obj, list) - assert isinstance(idx, int) - obj[idx] = value - elif opcode == 98: # getlocal - index = u30(coder) - stack.append(registers[index]) - elif opcode == 99: # setlocal - index = u30(coder) - value = stack.pop() - registers[index] = value - elif opcode == 102: # getproperty - index = u30(coder) - pname = multinames[index] - if pname == u'length': - obj = stack.pop() - assert isinstance(obj, list) - stack.append(len(obj)) - else: # Assume attribute access - idx = stack.pop() - assert isinstance(idx, int) - obj = stack.pop() - assert isinstance(obj, list) - stack.append(obj[idx]) - elif opcode == 128: # coerce - u30(coder) - elif opcode == 133: # coerce_s - assert isinstance(stack[-1], (type(None), compat_str)) - elif opcode == 164: # modulo - value2 = stack.pop() - value1 = stack.pop() - res = value1 % value2 - stack.append(res) - elif opcode == 175: # greaterequals - value2 = stack.pop() - value1 = stack.pop() - result = value1 >= value2 - stack.append(result) - elif opcode == 208: # getlocal_0 - stack.append(registers[0]) - elif opcode == 209: # getlocal_1 - stack.append(registers[1]) - elif opcode == 210: # getlocal_2 - stack.append(registers[2]) - elif opcode == 211: # getlocal_3 - stack.append(registers[3]) - elif opcode == 214: # setlocal_2 - registers[2] = stack.pop() - elif opcode == 215: # setlocal_3 - registers[3] = stack.pop() - else: - raise NotImplementedError( - u'Unsupported opcode %d' % opcode) - - avm_class.method_pyfunctions[func_name] = resfunc - return resfunc - - initial_function = extract_function(searched_class, u'decipher') + searched_class = swfi.extract_class(TARGET_CLASSNAME) + initial_function = swfi.extract_function(searched_class, u'decipher') return lambda s: initial_function([s]) def _decrypt_signature(self, s, video_id, player_url, age_gate=False): diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py new file mode 100644 index 000000000..1cd292138 --- /dev/null +++ b/youtube_dl/swfinterp.py @@ -0,0 +1,503 @@ +from __future__ import unicode_literals + +import collections +import io +import struct +import zlib + +from .utils import ExtractorError + + +def _extract_tags(content): + pos = 0 + while pos < len(content): + header16 = struct.unpack('<H', content[pos:pos + 2])[0] + pos += 2 + tag_code = header16 >> 6 + tag_len = header16 & 0x3f + if tag_len == 0x3f: + tag_len = struct.unpack('<I', content[pos:pos + 4])[0] + pos += 4 + assert pos + tag_len <= len(content) + yield (tag_code, content[pos:pos + tag_len]) + pos += tag_len + + +class _AVMClass_Object(object): + def __init__(self, avm_class): + self.avm_class = avm_class + + def __repr__(self): + return '%s#%x' % (self.avm_class.name, id(self)) + + +class _AVMClass(object): + def __init__(self, name_idx, name): + self.name_idx = name_idx + self.name = name + self.method_names = {} + self.method_idxs = {} + self.methods = {} + self.method_pyfunctions = {} + self.variables = {} + + def make_object(self): + return _AVMClass_Object(self) + + +def _read_int(reader): + res = 0 + shift = 0 + for _ in range(5): + buf = reader.read(1) + assert len(buf) == 1 + b = struct.unpack('<B', buf)[0] + res = res | ((b & 0x7f) << shift) + if b & 0x80 == 0: + break + shift += 7 + return res + + +def _u30(reader): + res = _read_int(reader) + assert res & 0xf0000000 == 0 + return res +u32 = _read_int + + +def _s32(reader): + v = _read_int(reader) + if v & 0x80000000 != 0: + v = - ((v ^ 0xffffffff) + 1) + return v + + +def _s24(reader): + bs = reader.read(3) + assert len(bs) == 3 + first_byte = b'\xff' if (ord(bs[0:1]) >= 0x80) else b'\x00' + return struct.unpack('!i', first_byte + bs) + + +def _read_string(reader): + slen = _u30(reader) + resb = reader.read(slen) + assert len(resb) == slen + return resb.decode('utf-8') + + +def _read_bytes(count, reader): + if reader is None: + reader = code_reader + resb = reader.read(count) + assert len(resb) == count + return resb + + +def _read_byte(reader): + resb = _read_bytes(1, reader=reader) + res = struct.unpack('<B', resb)[0] + return res + + +class SWFInterpreter(object): + def __init__(self, file_contents): + if file_contents[1:3] != b'WS': + raise ExtractorError( + 'Not an SWF file; header is %r' % file_contents[:3]) + if file_contents[:1] == b'C': + content = zlib.decompress(file_contents[8:]) + else: + raise NotImplementedError( + 'Unsupported compression format %r' % + file_contents[:1]) + + code_tag = next(tag + for tag_code, tag in _extract_tags(content) + if tag_code == 82) + p = code_tag.index(b'\0', 4) + 1 + code_reader = io.BytesIO(code_tag[p:]) + + # Parse ABC (AVM2 ByteCode) + + # Define a couple convenience methods + u30 = lambda *args: _u30(*args, reader=code_reader) + s32 = lambda *args: _s32(*args, reader=code_reader) + u32 = lambda *args: _u32(*args, reader=code_reader) + read_bytes = lambda *args: _read_bytes(*args, reader=code_reader) + read_byte = lambda *args: _read_byte(*args, reader=code_reader) + + # minor_version + major_version + read_bytes(2 + 2) + + # Constant pool + int_count = u30() + for _c in range(1, int_count): + s32() + uint_count = u30() + for _c in range(1, uint_count): + u32() + double_count = u30() + read_bytes((double_count - 1) * 8) + string_count = u30() + constant_strings = [''] + for _c in range(1, string_count): + s = _read_string(code_reader) + constant_strings.append(s) + namespace_count = u30() + for _c in range(1, namespace_count): + read_bytes(1) # kind + u30() # name + ns_set_count = u30() + for _c in range(1, ns_set_count): + count = u30() + for _c2 in range(count): + u30() + multiname_count = u30() + MULTINAME_SIZES = { + 0x07: 2, # QName + 0x0d: 2, # QNameA + 0x0f: 1, # RTQName + 0x10: 1, # RTQNameA + 0x11: 0, # RTQNameL + 0x12: 0, # RTQNameLA + 0x09: 2, # Multiname + 0x0e: 2, # MultinameA + 0x1b: 1, # MultinameL + 0x1c: 1, # MultinameLA + } + self.multinames = [''] + for _c in range(1, multiname_count): + kind = u30() + assert kind in MULTINAME_SIZES, 'Invalid multiname kind %r' % kind + if kind == 0x07: + u30() # namespace_idx + name_idx = u30() + self.multinames.append(constant_strings[name_idx]) + else: + self.multinames.append('[MULTINAME kind: %d]' % kind) + for _c2 in range(MULTINAME_SIZES[kind]): + u30() + + # Methods + method_count = u30() + MethodInfo = collections.namedtuple( + 'MethodInfo', + ['NEED_ARGUMENTS', 'NEED_REST']) + method_infos = [] + for method_id in range(method_count): + param_count = u30() + u30() # return type + for _ in range(param_count): + u30() # param type + u30() # name index (always 0 for youtube) + flags = read_byte() + if flags & 0x08 != 0: + # Options present + option_count = u30() + for c in range(option_count): + u30() # val + read_bytes(1) # kind + if flags & 0x80 != 0: + # Param names present + for _ in range(param_count): + u30() # param name + mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0) + method_infos.append(mi) + + # Metadata + metadata_count = u30() + for _c in range(metadata_count): + u30() # name + item_count = u30() + for _c2 in range(item_count): + u30() # key + u30() # value + + def parse_traits_info(): + trait_name_idx = u30() + kind_full = read_byte() + kind = kind_full & 0x0f + attrs = kind_full >> 4 + methods = {} + if kind in [0x00, 0x06]: # Slot or Const + u30() # Slot id + u30() # type_name_idx + vindex = u30() + if vindex != 0: + read_byte() # vkind + elif kind in [0x01, 0x02, 0x03]: # Method / Getter / Setter + u30() # disp_id + method_idx = u30() + methods[self.multinames[trait_name_idx]] = method_idx + elif kind == 0x04: # Class + u30() # slot_id + u30() # classi + elif kind == 0x05: # Function + u30() # slot_id + function_idx = u30() + methods[function_idx] = self.multinames[trait_name_idx] + else: + raise ExtractorError('Unsupported trait kind %d' % kind) + + if attrs & 0x4 != 0: # Metadata present + metadata_count = u30() + for _c3 in range(metadata_count): + u30() # metadata index + + return methods + + # Classes + class_count = u30() + classes = [] + for class_id in range(class_count): + name_idx = u30() + classes.append(_AVMClass(name_idx, self.multinames[name_idx])) + u30() # super_name idx + flags = read_byte() + if flags & 0x08 != 0: # Protected namespace is present + u30() # protected_ns_idx + intrf_count = u30() + for _c2 in range(intrf_count): + u30() + u30() # iinit + trait_count = u30() + for _c2 in range(trait_count): + parse_traits_info() + assert len(classes) == class_count + self._classes_by_name = dict((c.name, c) for c in classes) + + for avm_class in classes: + u30() # cinit + trait_count = u30() + for _c2 in range(trait_count): + trait_methods = parse_traits_info() + avm_class.method_names.update(trait_methods.items()) + avm_class.method_idxs.update(dict( + (idx, name) + for name, idx in trait_methods.items())) + + # Scripts + script_count = u30() + for _c in range(script_count): + u30() # init + trait_count = u30() + for _c2 in range(trait_count): + parse_traits_info() + + # Method bodies + method_body_count = u30() + Method = collections.namedtuple('Method', ['code', 'local_count']) + for _c in range(method_body_count): + method_idx = u30() + u30() # max_stack + local_count = u30() + u30() # init_scope_depth + u30() # max_scope_depth + code_length = u30() + code = read_bytes(code_length) + for avm_class in classes: + if method_idx in avm_class.method_idxs: + m = Method(code, local_count) + avm_class.methods[avm_class.method_idxs[method_idx]] = m + exception_count = u30() + for _c2 in range(exception_count): + u30() # from + u30() # to + u30() # target + u30() # exc_type + u30() # var_name + trait_count = u30() + for _c2 in range(trait_count): + parse_traits_info() + + assert p + code_reader.tell() == len(code_tag) + + def extract_class(self, class_name): + try: + return self._classes_by_name[class_name] + except KeyError: + raise ExtractorError('Class %r not found' % class_name) + + def extract_function(self, avm_class, func_name): + if func_name in avm_class.method_pyfunctions: + return avm_class.method_pyfunctions[func_name] + if func_name in self._classes_by_name: + return self._classes_by_name[func_name].make_object() + if func_name not in avm_class.methods: + raise ExtractorError('Cannot find function %r' % func_name) + m = avm_class.methods[func_name] + + def resfunc(args): + # Helper functions + coder = io.BytesIO(m.code) + s24 = lambda: _s24(coder) + u30 = lambda: _u30(coder) + + print('Invoking %s.%s(%r)' % (avm_class.name, func_name, tuple(args))) + registers = ['(this)'] + list(args) + [None] * m.local_count + stack = [] + while True: + opcode = _read_byte(coder) + print('opcode: %r, stack(%d): %r' % (opcode, len(stack), stack)) + if opcode == 17: # iftrue + offset = s24() + value = stack.pop() + if value: + coder.seek(coder.tell() + offset) + elif opcode == 36: # pushbyte + v = _read_byte(coder) + stack.append(v) + elif opcode == 44: # pushstring + idx = u30() + stack.append(constant_strings[idx]) + elif opcode == 48: # pushscope + # We don't implement the scope register, so we'll just + # ignore the popped value + new_scope = stack.pop() + elif opcode == 70: # callproperty + index = u30() + mname = self.multinames[index] + arg_count = u30() + args = list(reversed( + [stack.pop() for _ in range(arg_count)])) + obj = stack.pop() + if mname == 'split': + assert len(args) == 1 + assert isinstance(args[0], compat_str) + assert isinstance(obj, compat_str) + if args[0] == '': + res = list(obj) + else: + res = obj.split(args[0]) + stack.append(res) + elif mname == 'slice': + assert len(args) == 1 + assert isinstance(args[0], int) + assert isinstance(obj, list) + res = obj[args[0]:] + stack.append(res) + elif mname == 'join': + assert len(args) == 1 + assert isinstance(args[0], compat_str) + assert isinstance(obj, list) + res = args[0].join(obj) + stack.append(res) + elif mname in avm_class.method_pyfunctions: + stack.append(avm_class.method_pyfunctions[mname](args)) + else: + raise NotImplementedError( + 'Unsupported property %r on %r' + % (mname, obj)) + elif opcode == 72: # returnvalue + res = stack.pop() + return res + elif opcode == 74: # constructproperty + index = u30() + arg_count = u30() + args = list(reversed( + [stack.pop() for _ in range(arg_count)])) + obj = stack.pop() + + mname = self.multinames[index] + construct_method = self.extract_function( + obj.avm_class, mname) + # We do not actually call the constructor for now; + # we just pretend it does nothing + stack.append(obj) + elif opcode == 79: # callpropvoid + index = u30() + mname = self.multinames[index] + arg_count = u30() + args = list(reversed( + [stack.pop() for _ in range(arg_count)])) + obj = stack.pop() + if mname == 'reverse': + assert isinstance(obj, list) + obj.reverse() + else: + raise NotImplementedError( + 'Unsupported (void) property %r on %r' + % (mname, obj)) + elif opcode == 86: # newarray + arg_count = u30() + arr = [] + for i in range(arg_count): + arr.append(stack.pop()) + arr = arr[::-1] + stack.append(arr) + elif opcode == 93: # findpropstrict + index = u30() + mname = self.multinames[index] + res = self.extract_function(avm_class, mname) + stack.append(res) + elif opcode == 94: # findproperty + index = u30() + mname = self.multinames[index] + res = avm_class.variables.get(mname) + stack.append(res) + elif opcode == 96: # getlex + index = u30() + mname = self.multinames[index] + res = avm_class.variables.get(mname, None) + stack.append(res) + elif opcode == 97: # setproperty + index = u30() + value = stack.pop() + idx = self.multinames[index] + obj = stack.pop() + obj[idx] = value + elif opcode == 98: # getlocal + index = u30() + stack.append(registers[index]) + elif opcode == 99: # setlocal + index = u30() + value = stack.pop() + registers[index] = value + elif opcode == 102: # getproperty + index = u30() + pname = self.multinames[index] + if pname == 'length': + obj = stack.pop() + assert isinstance(obj, list) + stack.append(len(obj)) + else: # Assume attribute access + idx = stack.pop() + assert isinstance(idx, int) + obj = stack.pop() + assert isinstance(obj, list) + stack.append(obj[idx]) + elif opcode == 128: # coerce + u30() + elif opcode == 133: # coerce_s + assert isinstance(stack[-1], (type(None), compat_str)) + elif opcode == 164: # modulo + value2 = stack.pop() + value1 = stack.pop() + res = value1 % value2 + stack.append(res) + elif opcode == 175: # greaterequals + value2 = stack.pop() + value1 = stack.pop() + result = value1 >= value2 + stack.append(result) + elif opcode == 208: # getlocal_0 + stack.append(registers[0]) + elif opcode == 209: # getlocal_1 + stack.append(registers[1]) + elif opcode == 210: # getlocal_2 + stack.append(registers[2]) + elif opcode == 211: # getlocal_3 + stack.append(registers[3]) + elif opcode == 214: # setlocal_2 + registers[2] = stack.pop() + elif opcode == 215: # setlocal_3 + registers[3] = stack.pop() + else: + raise NotImplementedError( + 'Unsupported opcode %d' % opcode) + + avm_class.method_pyfunctions[func_name] = resfunc + return resfunc + From c45a6caa95e9cea09b84417cfd13ff066986c695 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 18 Jul 2014 21:37:40 +0700 Subject: [PATCH 261/340] [utils] Add None check in str_to_int --- youtube_dl/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 64a9618ca..919603c62 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1194,6 +1194,8 @@ def format_bytes(bytes): def str_to_int(int_str): + if int_str is None: + return None int_str = re.sub(r'[,\.]', u'', int_str) return int(int_str) From e0942e37aa6f20de88f6d5ed7dc288408500371a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Fri, 18 Jul 2014 21:39:21 +0700 Subject: [PATCH 262/340] [crackled] Improve, fix invalid regexes and extract more metadata --- youtube_dl/extractor/cracked.py | 61 +++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/cracked.py b/youtube_dl/extractor/cracked.py index 37c0f7ffb..74b880ffc 100644 --- a/youtube_dl/extractor/cracked.py +++ b/youtube_dl/extractor/cracked.py @@ -1,23 +1,26 @@ -# coding: utf-8 from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ( + parse_iso8601, + str_to_int, +) + class CrackedIE(InfoExtractor): - _VALID_URL = r'http?://.*?\.cracked\.com/video_+(?P<id>.*)_.*' + _VALID_URL = r'https?://(?:www\.)?cracked\.com/video_(?P<id>\d+)_[\da-z-]+\.html' _TEST = { - 'url': 'http://www.cracked.com/video_18803_4-social-criticisms-hidden-in-sonic-hedgehog-games.html', - + 'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html', + 'md5': '4b29a5eeec292cd5eca6388c7558db9e', 'info_dict': { - 'id': '18803', + 'id': '19006', 'ext': 'mp4', - 'title': "4 Social Criticisms Hidden in 'Sonic the Hedgehog' Games | Cracked.com", - 'height': 375, - 'width': 666, - - + 'title': '4 Plot Holes You Didn\'t Notice in Your Favorite Movies', + 'description': 'md5:3b909e752661db86007d10e5ec2df769', + 'timestamp': 1405659600, + 'upload_date': '20140718', } } @@ -26,21 +29,37 @@ class CrackedIE(InfoExtractor): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - title = self._search_regex(r'<title>(.*?)',webpage,'title') - video_url = self._search_regex(r'var CK_vidSrc = "+(.*)"',webpage,'url') - width = self._search_regex(r'width="(.*?)"',webpage,'width') - height = re.findall(r'height="(.*?)"',webpage)[1] + video_url = self._html_search_regex( + [r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'([\d,\.]+) Views', webpage, 'view count', fatal=False)) + comment_count = str_to_int(self._html_search_regex( + r'([\d,\.]+)', webpage, 'comment count', fatal=False)) + + m = re.search(r'_(?P\d+)X(?P\d+)\.mp4$', video_url) + if m: + width = int(m.group('width')) + height = int(m.group('height')) + else: + width = height = None return { - 'url':video_url, 'id': video_id, - 'ext':'mp4', - 'title':title, - 'height':int(height), - 'width':int(width) - - + 'url':video_url, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'view_count': view_count, + 'comment_count': comment_count, + 'height': height, + 'width': width, } \ No newline at end of file From 5e95cb27d683c62ff03bd219b5aaf41fc62289bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Jul 2014 21:41:34 +0700 Subject: [PATCH 263/340] Credit @hassaanaliw for cracked (#3274) --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 6e2359b28..f223b75f4 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -65,6 +65,7 @@ __authors__ = ( 'Tobias Bell', 'Naglis Jonaitis', 'Charles Chen', + 'Hassaan Ali', ) __license__ = 'Public Domain' From 0c1ffe980d82c664ed46c31eae7c589ebcd7fc20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 18 Jul 2014 21:43:01 +0700 Subject: [PATCH 264/340] [mlb] Fix _VALID_URL --- youtube_dl/extractor/mlb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py index 18ab2c135..c28be3a7d 100644 --- a/youtube_dl/extractor/mlb.py +++ b/youtube_dl/extractor/mlb.py @@ -11,7 +11,7 @@ from ..utils import ( class MLBIE(InfoExtractor): - _VALID_URL = r'http?://m\.mlb\.com/video/(?:topic/[\da-z_-]+/)?v(?Pn?\d+)' + _VALID_URL = r'https?://m\.mlb\.com/video/(?:topic/[\da-z_-]+/)?v(?Pn?\d+)' _TESTS = [ { 'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby', From 23d3c422ab1e38e674ed9f0e6efebb41d5ff63fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 19 Jul 2014 17:47:50 +0700 Subject: [PATCH 265/340] [francetv] Add support for mobile URLs (Closes #3275) --- youtube_dl/extractor/francetv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index f3e0f38b7..1fbe6d175 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -48,7 +48,7 @@ class PluzzIE(FranceTVBaseInfoExtractor): class FranceTvInfoIE(FranceTVBaseInfoExtractor): IE_NAME = 'francetvinfo.fr' - _VALID_URL = r'https?://www\.francetvinfo\.fr/.*/(?P.+)\.html' + _VALID_URL = r'https?://(?:www|mobile)\.francetvinfo\.fr/.*/(?P<title>.+)\.html' _TESTS = [{ 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', @@ -211,7 +211,7 @@ class GenerationQuoiIE(InfoExtractor): class CultureboxIE(FranceTVBaseInfoExtractor): IE_NAME = 'culturebox.francetvinfo.fr' - _VALID_URL = r'https?://culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)' + _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)' _TEST = { 'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813', From 604f292ab7701b9284e50877f68ae7fcadcc34bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 20 Jul 2014 00:00:20 +0700 Subject: [PATCH 266/340] [sapo] Add extractor (Closes #2816) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/sapo.py | 119 +++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 youtube_dl/extractor/sapo.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f78aa066f..a17a80a5f 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -253,6 +253,7 @@ from .rutube import ( RutubePersonIE, ) from .rutv import RUTVIE +from .sapo import SapoIE from .savefrom import SaveFromIE from .scivee import SciVeeIE from .screencast import ScreencastIE diff --git a/youtube_dl/extractor/sapo.py b/youtube_dl/extractor/sapo.py new file mode 100644 index 000000000..172cc1275 --- /dev/null +++ b/youtube_dl/extractor/sapo.py @@ -0,0 +1,119 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + unified_strdate, +) + + +class SapoIE(InfoExtractor): + IE_DESC = 'SAPO Vídeos' + _VALID_URL = r'https?://(?:(?:v2|www)\.)?videos\.sapo\.(?:pt|cv|ao|mz|tl)/(?P<id>[\da-zA-Z]{20})' + + _TESTS = [ + { + 'url': 'http://videos.sapo.pt/UBz95kOtiWYUMTA5Ghfi', + 'md5': '79ee523f6ecb9233ac25075dee0eda83', + 'note': 'SD video', + 'info_dict': { + 'id': 'UBz95kOtiWYUMTA5Ghfi', + 'ext': 'mp4', + 'title': 'Benfica - Marcas na Hitória', + 'description': 'md5:c9082000a128c3fd57bf0299e1367f22', + 'duration': 264, + 'uploader': 'tiago_1988', + 'upload_date': '20080229', + 'categories': ['benfica', 'cabral', 'desporto', 'futebol', 'geovanni', 'hooijdonk', 'joao', 'karel', 'lisboa', 'miccoli'], + }, + }, + { + 'url': 'http://videos.sapo.pt/IyusNAZ791ZdoCY5H5IF', + 'md5': '90a2f283cfb49193fe06e861613a72aa', + 'note': 'HD video', + 'info_dict': { + 'id': 'IyusNAZ791ZdoCY5H5IF', + 'ext': 'mp4', + 'title': 'Codebits VII - Report', + 'description': 'md5:6448d6fd81ce86feac05321f354dbdc8', + 'duration': 144, + 'uploader': 'codebits', + 'upload_date': '20140427', + 'categories': ['codebits', 'codebits2014'], + }, + }, + { + 'url': 'http://v2.videos.sapo.pt/yLqjzPtbTimsn2wWBKHz', + 'md5': 'e5aa7cc0bdc6db9b33df1a48e49a15ac', + 'note': 'v2 video', + 'info_dict': { + 'id': 'yLqjzPtbTimsn2wWBKHz', + 'ext': 'mp4', + 'title': 'Hipnose Condicionativa 4', + 'description': 'md5:ef0481abf8fb4ae6f525088a6dadbc40', + 'duration': 692, + 'uploader': 'sapozen', + 'upload_date': '20090609', + 'categories': ['condicionativa', 'heloisa', 'hipnose', 'miranda', 'sapo', 'zen'], + }, + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + item = self._download_xml( + 'http://rd3.videos.sapo.pt/%s/rss2' % video_id, video_id).find('./channel/item') + + title = item.find('./title').text + description = item.find('./{http://videos.sapo.pt/mrss/}synopse').text + thumbnail = item.find('./{http://search.yahoo.com/mrss/}content').get('url') + duration = parse_duration(item.find('./{http://videos.sapo.pt/mrss/}time').text) + uploader = item.find('./{http://videos.sapo.pt/mrss/}author').text + upload_date = unified_strdate(item.find('./pubDate').text) + view_count = int(item.find('./{http://videos.sapo.pt/mrss/}views').text) + comment_count = int(item.find('./{http://videos.sapo.pt/mrss/}comment_count').text) + tags = item.find('./{http://videos.sapo.pt/mrss/}tags').text + categories = tags.split() if tags else [] + age_limit = 18 if item.find('./{http://videos.sapo.pt/mrss/}m18').text == 'true' else 0 + + video_url = item.find('./{http://videos.sapo.pt/mrss/}videoFile').text + video_size = item.find('./{http://videos.sapo.pt/mrss/}videoSize').text.split('x') + + formats = [{ + 'url': video_url, + 'ext': 'mp4', + 'format_id': 'sd', + 'width': int(video_size[0]), + 'height': int(video_size[1]), + }] + + if item.find('./{http://videos.sapo.pt/mrss/}HD').text == 'true': + formats.append({ + 'url': re.sub(r'/mov/1$', '/mov/39', video_url), + 'ext': 'mp4', + 'format_id': 'hd', + 'width': 1280, + 'height': 720, + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'uploader': uploader, + 'upload_date': upload_date, + 'view_count': view_count, + 'comment_count': comment_count, + 'categories': categories, + 'age_limit': age_limit, + 'formats': formats, + } From 8adec2b9e05d356a6996ea6f85aa9b4bf0665ce2 Mon Sep 17 00:00:00 2001 From: hassaanaliw <hassaanaliw@gmail.com> Date: Sat, 19 Jul 2014 22:49:25 +0500 Subject: [PATCH 267/340] [snotr] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/snotr.py | 73 ++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 youtube_dl/extractor/snotr.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 78b95c2a5..faf473548 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -263,6 +263,7 @@ from .smotri import ( SmotriUserIE, SmotriBroadcastIE, ) +from .snotr import SnotrIE from .sohu import SohuIE from .soundcloud import ( SoundcloudIE, diff --git a/youtube_dl/extractor/snotr.py b/youtube_dl/extractor/snotr.py new file mode 100644 index 000000000..f89e81bf3 --- /dev/null +++ b/youtube_dl/extractor/snotr.py @@ -0,0 +1,73 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +from ..utils import ( + + str_to_int, + parse_iso8601, + + + +) + +class SnotrIE(InfoExtractor): + _VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)' + _TESTS =[ { + 'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks', + 'info_dict': { + 'id': '13708', + 'ext': 'flv', + 'title': 'Drone flying through fireworks!', + 'duration': 247, + 'filesize':12320768 + } + }, + + + + { + + 'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10', + 'info_dict': { + 'id': '530', + 'ext': 'flv', + 'title': 'David Letteman - George W. Bush Top 10', + 'duration': 126, + 'filesize': 1048576 + } + }] + + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + # TODO more code goes here, for example ... + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) + + description = self._og_search_description(webpage) + + video_url = "http://cdn.videos.snotr.com/%s.flv" % video_id + + view_count = str_to_int(self._html_search_regex(r'<p>\n<strong>Views:</strong>\n([\d,\.]+)</p>',webpage,'view count')) + + duration = self._html_search_regex(r'<p>\n<strong>Length:</strong>\n(.*?)</p>',webpage,'duration') + duration = str_to_int(duration[:1])*60 + str_to_int(duration[2:4]) + + file_size = self._html_search_regex(r'<p>\n<strong>Filesize:</strong>\n(.*?)</p>',webpage,'filesize') + file_size = str_to_int(re.match(r'\d+',file_size).group())*131072 + + return { + 'id': video_id, + 'title': title, + 'url':video_url, + 'view_count':view_count, + 'duration':duration, + 'filesize':file_size + + } \ No newline at end of file From 0cb2056304178ae8944e84c5bc72f96102291a12 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Fri, 18 Jul 2014 14:20:34 +0200 Subject: [PATCH 268/340] [swfinterp] Start working on basic tests --- test/swftests/.gitignore | 1 + test/swftests/LocalVars.as | 13 +++++++ test/test_swfinterp.py | 73 ++++++++++++++++++++++++++++++++++++++ youtube_dl/swfinterp.py | 56 ++++++++++++++++++++--------- 4 files changed, 126 insertions(+), 17 deletions(-) create mode 100644 test/swftests/.gitignore create mode 100644 test/swftests/LocalVars.as create mode 100644 test/test_swfinterp.py diff --git a/test/swftests/.gitignore b/test/swftests/.gitignore new file mode 100644 index 000000000..da97ff7ca --- /dev/null +++ b/test/swftests/.gitignore @@ -0,0 +1 @@ +*.swf diff --git a/test/swftests/LocalVars.as b/test/swftests/LocalVars.as new file mode 100644 index 000000000..b2911a9f3 --- /dev/null +++ b/test/swftests/LocalVars.as @@ -0,0 +1,13 @@ +// input: [1, 2] +// output: 3 + +package { +public class LocalVars { + public static function main(a:int, b:int):int{ + var c:int = a + b + b; + var d:int = c - b; + var e:int = d; + return e; + } +} +} diff --git a/test/test_swfinterp.py b/test/test_swfinterp.py new file mode 100644 index 000000000..98a14a006 --- /dev/null +++ b/test/test_swfinterp.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +import io +import json +import re +import subprocess + +from youtube_dl.swfinterp import SWFInterpreter + + +TEST_DIR = os.path.join( + os.path.dirname(os.path.abspath(__file__)), 'swftests') + + +class TestSWFInterpreter(unittest.TestCase): + pass + + +for testfile in os.listdir(TEST_DIR): + m = re.match(r'^(.*)\.(as)$', testfile) + if not m: + continue + test_id = m.group(1) + + def test_func(self): + as_file = os.path.join(TEST_DIR, testfile) + swf_file = os.path.join(TEST_DIR, test_id + '.swf') + if ((not os.path.exists(swf_file)) + or os.path.getmtime(swf_file) < os.path.getmtime(as_file)): + # Recompile + try: + subprocess.check_call(['mxmlc', '--output', swf_file, as_file]) + except OSError as ose: + if ose.errno == errno.ENOENT: + print('mxmlc not found! Skipping test.') + return + raise + + with open(swf_file, 'rb') as swf_f: + swf_content = swf_f.read() + swfi = SWFInterpreter(swf_content) + + with io.open(as_file, 'r', encoding='utf-8') as as_f: + as_content = as_f.read() + + def _find_spec(key): + m = re.search( + r'(?m)^//\s*%s:\s*(.*?)\n' % re.escape(key), as_content) + if not m: + raise ValueError('Cannot find %s in %s' % (key, testfile)) + return json.loads(m.group(1)) + + input_args = _find_spec('input') + output = _find_spec('output') + + swf_class = swfi.extract_class(test_id) + func = swfi.extract_function(swf_class, 'main') + res = func(input_args) + self.assertEqual(res, output) + + test_func.__name__ = str('test_swf_' + test_id) + setattr(TestSWFInterpreter, test_func.__name__, test_func) + + +if __name__ == '__main__': + unittest.main() diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 1cd292138..49fade364 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -8,8 +8,22 @@ import zlib from .utils import ExtractorError -def _extract_tags(content): - pos = 0 +def _extract_tags(file_contents): + if file_contents[1:3] != b'WS': + raise ExtractorError( + 'Not an SWF file; header is %r' % file_contents[:3]) + if file_contents[:1] == b'C': + content = zlib.decompress(file_contents[8:]) + else: + raise NotImplementedError( + 'Unsupported compression format %r' % + file_contents[:1]) + + # Determine number of bits in framesize rectangle + framesize_nbits = struct.unpack('!B', content[:1])[0] >> 3 + framesize_len = (5 + 4 * framesize_nbits + 7) // 8 + + pos = framesize_len + 2 + 2 while pos < len(content): header16 = struct.unpack('<H', content[pos:pos + 2])[0] pos += 2 @@ -18,7 +32,9 @@ def _extract_tags(content): if tag_len == 0x3f: tag_len = struct.unpack('<I', content[pos:pos + 4])[0] pos += 4 - assert pos + tag_len <= len(content) + assert pos + tag_len <= len(content), \ + ('Tag %d ends at %d+%d - that\'s longer than the file (%d)' + % (tag_code, pos, tag_len, len(content))) yield (tag_code, content[pos:pos + tag_len]) pos += tag_len @@ -88,8 +104,7 @@ def _read_string(reader): def _read_bytes(count, reader): - if reader is None: - reader = code_reader + assert count >= 0 resb = reader.read(count) assert len(resb) == count return resb @@ -103,18 +118,8 @@ def _read_byte(reader): class SWFInterpreter(object): def __init__(self, file_contents): - if file_contents[1:3] != b'WS': - raise ExtractorError( - 'Not an SWF file; header is %r' % file_contents[:3]) - if file_contents[:1] == b'C': - content = zlib.decompress(file_contents[8:]) - else: - raise NotImplementedError( - 'Unsupported compression format %r' % - file_contents[:1]) - code_tag = next(tag - for tag_code, tag in _extract_tags(content) + for tag_code, tag in _extract_tags(file_contents) if tag_code == 82) p = code_tag.index(b'\0', 4) + 1 code_reader = io.BytesIO(code_tag[p:]) @@ -139,7 +144,7 @@ class SWFInterpreter(object): for _c in range(1, uint_count): u32() double_count = u30() - read_bytes((double_count - 1) * 8) + read_bytes(max(0, (double_count - 1)) * 8) string_count = u30() constant_strings = [''] for _c in range(1, string_count): @@ -349,6 +354,9 @@ class SWFInterpreter(object): elif opcode == 36: # pushbyte v = _read_byte(coder) stack.append(v) + elif opcode == 42: # dup + value = stack[-1] + stack.append(value) elif opcode == 44: # pushstring idx = u30() stack.append(constant_strings[idx]) @@ -468,10 +476,24 @@ class SWFInterpreter(object): obj = stack.pop() assert isinstance(obj, list) stack.append(obj[idx]) + elif opcode == 115: # convert_ + value = stack.pop() + intvalue = int(value) + stack.append(intvalue) elif opcode == 128: # coerce u30() elif opcode == 133: # coerce_s assert isinstance(stack[-1], (type(None), compat_str)) + elif opcode == 160: # add + value2 = stack.pop() + value1 = stack.pop() + res = value1 + value2 + stack.append(res) + elif opcode == 161: # subtract + value2 = stack.pop() + value1 = stack.pop() + res = value1 - value2 + stack.append(res) elif opcode == 164: # modulo value2 = stack.pop() value1 = stack.pop() From e75c24e88907f329c57cf05d729dbf599349bb50 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 20 Jul 2014 00:03:54 +0200 Subject: [PATCH 269/340] [swfinterp] Extend tests and fix parsing --- test/swftests/StaticAssignment.as | 13 +++++++++ test/swftests/StaticRetrieval.as | 16 +++++++++++ test/test_swfinterp.py | 9 ++++-- youtube_dl/swfinterp.py | 47 +++++++++++++++++++++++-------- 4 files changed, 70 insertions(+), 15 deletions(-) create mode 100644 test/swftests/StaticAssignment.as create mode 100644 test/swftests/StaticRetrieval.as diff --git a/test/swftests/StaticAssignment.as b/test/swftests/StaticAssignment.as new file mode 100644 index 000000000..b061c219d --- /dev/null +++ b/test/swftests/StaticAssignment.as @@ -0,0 +1,13 @@ +// input: [1] +// output: 1 + +package { +public class StaticAssignment { + public static var v:int; + + public static function main(a:int):int{ + v = a; + return v; + } +} +} diff --git a/test/swftests/StaticRetrieval.as b/test/swftests/StaticRetrieval.as new file mode 100644 index 000000000..c8352d819 --- /dev/null +++ b/test/swftests/StaticRetrieval.as @@ -0,0 +1,16 @@ +// input: [] +// output: 1 + +package { +public class StaticRetrieval { + public static var v:int; + + public static function main():int{ + if (v) { + return 0; + } else { + return 1; + } + } +} +} diff --git a/test/test_swfinterp.py b/test/test_swfinterp.py index 98a14a006..3bb5a6308 100644 --- a/test/test_swfinterp.py +++ b/test/test_swfinterp.py @@ -23,10 +23,10 @@ class TestSWFInterpreter(unittest.TestCase): pass -for testfile in os.listdir(TEST_DIR): +def _make_testfunc(testfile): m = re.match(r'^(.*)\.(as)$', testfile) if not m: - continue + return test_id = m.group(1) def test_func(self): @@ -36,7 +36,7 @@ for testfile in os.listdir(TEST_DIR): or os.path.getmtime(swf_file) < os.path.getmtime(as_file)): # Recompile try: - subprocess.check_call(['mxmlc', '--output', swf_file, as_file]) + subprocess.check_call(['mxmlc', '-output', swf_file, as_file]) except OSError as ose: if ose.errno == errno.ENOENT: print('mxmlc not found! Skipping test.') @@ -69,5 +69,8 @@ for testfile in os.listdir(TEST_DIR): setattr(TestSWFInterpreter, test_func.__name__, test_func) +for testfile in os.listdir(TEST_DIR): + _make_testfunc(testfile) + if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 49fade364..64a518fc6 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -39,6 +39,16 @@ def _extract_tags(file_contents): pos += tag_len +class _AVM_Object(object): + def __init__(self, value=None, name_hint=None): + self.value = value + self.name_hint = name_hint + + def __repr__(self): + nh = '' if self.name_hint is None else (' %s' % self.name_hint) + return 'AVMObject%s(%r)' % (nh, self.value) + + class _AVMClass_Object(object): def __init__(self, avm_class): self.avm_class = avm_class @@ -92,8 +102,8 @@ def _s32(reader): def _s24(reader): bs = reader.read(3) assert len(bs) == 3 - first_byte = b'\xff' if (ord(bs[0:1]) >= 0x80) else b'\x00' - return struct.unpack('!i', first_byte + bs) + last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00' + return struct.unpack('<i', bs + last_byte)[0] def _read_string(reader): @@ -341,8 +351,9 @@ class SWFInterpreter(object): u30 = lambda: _u30(coder) print('Invoking %s.%s(%r)' % (avm_class.name, func_name, tuple(args))) - registers = ['(this)'] + list(args) + [None] * m.local_count + registers = [avm_class.variables] + list(args) + [None] * m.local_count stack = [] + scopes = collections.deque([avm_class.variables]) while True: opcode = _read_byte(coder) print('opcode: %r, stack(%d): %r' % (opcode, len(stack), stack)) @@ -351,6 +362,11 @@ class SWFInterpreter(object): value = stack.pop() if value: coder.seek(coder.tell() + offset) + elif opcode == 18: # iffalse + offset = s24() + value = stack.pop() + if not value: + coder.seek(coder.tell() + offset) elif opcode == 36: # pushbyte v = _read_byte(coder) stack.append(v) @@ -361,9 +377,8 @@ class SWFInterpreter(object): idx = u30() stack.append(constant_strings[idx]) elif opcode == 48: # pushscope - # We don't implement the scope register, so we'll just - # ignore the popped value new_scope = stack.pop() + scopes.append(new_scope) elif opcode == 70: # callproperty index = u30() mname = self.multinames[index] @@ -435,20 +450,28 @@ class SWFInterpreter(object): arr.append(stack.pop()) arr = arr[::-1] stack.append(arr) - elif opcode == 93: # findpropstrict - index = u30() - mname = self.multinames[index] - res = self.extract_function(avm_class, mname) - stack.append(res) elif opcode == 94: # findproperty index = u30() mname = self.multinames[index] - res = avm_class.variables.get(mname) + for s in reversed(scopes): + if mname in s: + res = s + break + else: + res = scopes[0] stack.append(res) elif opcode == 96: # getlex index = u30() mname = self.multinames[index] - res = avm_class.variables.get(mname, None) + for s in reversed(scopes): + if mname in s: + scope = s + break + else: + scope = scopes[0] + # I cannot find where static variables are initialized + # so let's just return None + res = scope.get(mname) stack.append(res) elif opcode == 97: # setproperty index = u30() From 70f767dc65189df0118d319d62385e54bd9bb03e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 20 Jul 2014 00:25:58 +0200 Subject: [PATCH 270/340] [swfinterp] Add support for multiple classes --- test/swftests/ClassConstruction.as | 15 ++++++++++ youtube_dl/swfinterp.py | 48 ++++++++++++++++++++++++++---- 2 files changed, 57 insertions(+), 6 deletions(-) create mode 100644 test/swftests/ClassConstruction.as diff --git a/test/swftests/ClassConstruction.as b/test/swftests/ClassConstruction.as new file mode 100644 index 000000000..436479f8f --- /dev/null +++ b/test/swftests/ClassConstruction.as @@ -0,0 +1,15 @@ +// input: [] +// output: 0 + +package { +public class ClassConstruction { + public static function main():int{ + var f:Foo = new Foo(); + return 0; + } +} +} + +class Foo { + +} diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 64a518fc6..f7a3889a0 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -5,7 +5,10 @@ import io import struct import zlib -from .utils import ExtractorError +from .utils import ( + compat_str, + ExtractorError, +) def _extract_tags(file_contents): @@ -65,7 +68,26 @@ class _AVMClass(object): self.method_idxs = {} self.methods = {} self.method_pyfunctions = {} - self.variables = {} + + class ScopeDict(dict): + def __init__(self, avm_class): + super(ScopeDict, self).__init__() + self.avm_class = avm_class + + def __getitem__(self, k): + print('getting %r' % k) + return super(ScopeDict, self).__getitem__(k) + + def __contains__(self, k): + print('contains %r' % k) + return super(ScopeDict, self).__contains__(k) + + def __repr__(self): + return '%s__Scope(%s)' % ( + self.avm_class.name, + super(ScopeDict, self).__repr__()) + + self.variables = ScopeDict(self) def make_object(self): return _AVMClass_Object(self) @@ -156,10 +178,10 @@ class SWFInterpreter(object): double_count = u30() read_bytes(max(0, (double_count - 1)) * 8) string_count = u30() - constant_strings = [''] + self.constant_strings = [''] for _c in range(1, string_count): s = _read_string(code_reader) - constant_strings.append(s) + self.constant_strings.append(s) namespace_count = u30() for _c in range(1, namespace_count): read_bytes(1) # kind @@ -189,7 +211,7 @@ class SWFInterpreter(object): if kind == 0x07: u30() # namespace_idx name_idx = u30() - self.multinames.append(constant_strings[name_idx]) + self.multinames.append(self.constant_strings[name_idx]) else: self.multinames.append('[MULTINAME kind: %d]' % kind) for _c2 in range(MULTINAME_SIZES[kind]): @@ -375,7 +397,7 @@ class SWFInterpreter(object): stack.append(value) elif opcode == 44: # pushstring idx = u30() - stack.append(constant_strings[idx]) + stack.append(self.constant_strings[idx]) elif opcode == 48: # pushscope new_scope = stack.pop() scopes.append(new_scope) @@ -450,6 +472,16 @@ class SWFInterpreter(object): arr.append(stack.pop()) arr = arr[::-1] stack.append(arr) + elif opcode == 93: # findpropstrict + index = u30() + mname = self.multinames[index] + for s in reversed(scopes): + if mname in s: + res = s + break + else: + res = scopes[0] + stack.append(res) elif opcode == 94: # findproperty index = u30() mname = self.multinames[index] @@ -535,6 +567,10 @@ class SWFInterpreter(object): stack.append(registers[2]) elif opcode == 211: # getlocal_3 stack.append(registers[3]) + elif opcode == 212: # setlocal_0 + registers[0] = stack.pop() + elif opcode == 213: # setlocal_1 + registers[1] = stack.pop() elif opcode == 214: # setlocal_2 registers[2] = stack.pop() elif opcode == 215: # setlocal_3 From 01b4b745749bb92b4a56b4201d699740cbf450ab Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 20 Jul 2014 12:47:15 +0200 Subject: [PATCH 271/340] [swfinterp] Add support for calls to instance methods --- test/swftests/ClassCall.as | 17 ++++++ youtube_dl/swfinterp.py | 117 +++++++++++++++++++------------------ 2 files changed, 77 insertions(+), 57 deletions(-) create mode 100644 test/swftests/ClassCall.as diff --git a/test/swftests/ClassCall.as b/test/swftests/ClassCall.as new file mode 100644 index 000000000..aef58daf3 --- /dev/null +++ b/test/swftests/ClassCall.as @@ -0,0 +1,17 @@ +// input: [] +// output: 121 + +package { +public class ClassCall { + public static function main():int{ + var f:OtherClass = new OtherClass(); + return f.func(100,20); + } +} +} + +class OtherClass { + public function func(x: int, y: int):int { + return x+y+1; + } +} diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index f7a3889a0..8ccb64c9d 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -42,16 +42,6 @@ def _extract_tags(file_contents): pos += tag_len -class _AVM_Object(object): - def __init__(self, value=None, name_hint=None): - self.value = value - self.name_hint = name_hint - - def __repr__(self): - nh = '' if self.name_hint is None else (' %s' % self.name_hint) - return 'AVMObject%s(%r)' % (nh, self.value) - - class _AVMClass_Object(object): def __init__(self, avm_class): self.avm_class = avm_class @@ -74,14 +64,6 @@ class _AVMClass(object): super(ScopeDict, self).__init__() self.avm_class = avm_class - def __getitem__(self, k): - print('getting %r' % k) - return super(ScopeDict, self).__getitem__(k) - - def __contains__(self, k): - print('contains %r' % k) - return super(ScopeDict, self).__contains__(k) - def __repr__(self): return '%s__Scope(%s)' % ( self.avm_class.name, @@ -92,6 +74,15 @@ class _AVMClass(object): def make_object(self): return _AVMClass_Object(self) + def __repr__(self): + return '_AVMClass(%s)' % (self.name) + + def register_methods(self, methods): + self.method_names.update(methods.items()) + self.method_idxs.update(dict( + (idx, name) + for name, idx in methods.items())) + def _read_int(reader): res = 0 @@ -290,7 +281,11 @@ class SWFInterpreter(object): classes = [] for class_id in range(class_count): name_idx = u30() - classes.append(_AVMClass(name_idx, self.multinames[name_idx])) + + cname = self.multinames[name_idx] + avm_class = _AVMClass(name_idx, cname) + classes.append(avm_class) + u30() # super_name idx flags = read_byte() if flags & 0x08 != 0: # Protected namespace is present @@ -301,7 +296,9 @@ class SWFInterpreter(object): u30() # iinit trait_count = u30() for _c2 in range(trait_count): - parse_traits_info() + trait_methods = parse_traits_info() + avm_class.register_methods(trait_methods) + assert len(classes) == class_count self._classes_by_name = dict((c.name, c) for c in classes) @@ -310,10 +307,7 @@ class SWFInterpreter(object): trait_count = u30() for _c2 in range(trait_count): trait_methods = parse_traits_info() - avm_class.method_names.update(trait_methods.items()) - avm_class.method_idxs.update(dict( - (idx, name) - for name, idx in trait_methods.items())) + avm_class.register_methods(trait_methods) # Scripts script_count = u30() @@ -358,12 +352,14 @@ class SWFInterpreter(object): raise ExtractorError('Class %r not found' % class_name) def extract_function(self, avm_class, func_name): + print('Extracting %s.%s' % (avm_class.name, func_name)) if func_name in avm_class.method_pyfunctions: return avm_class.method_pyfunctions[func_name] if func_name in self._classes_by_name: return self._classes_by_name[func_name].make_object() if func_name not in avm_class.methods: - raise ExtractorError('Cannot find function %r' % func_name) + raise ExtractorError('Cannot find function %s.%s' % ( + avm_class.name, func_name)) m = avm_class.methods[func_name] def resfunc(args): @@ -375,7 +371,8 @@ class SWFInterpreter(object): print('Invoking %s.%s(%r)' % (avm_class.name, func_name, tuple(args))) registers = [avm_class.variables] + list(args) + [None] * m.local_count stack = [] - scopes = collections.deque([avm_class.variables]) + scopes = collections.deque([ + self._classes_by_name, avm_class.variables]) while True: opcode = _read_byte(coder) print('opcode: %r, stack(%d): %r' % (opcode, len(stack), stack)) @@ -408,33 +405,38 @@ class SWFInterpreter(object): args = list(reversed( [stack.pop() for _ in range(arg_count)])) obj = stack.pop() - if mname == 'split': - assert len(args) == 1 - assert isinstance(args[0], compat_str) - assert isinstance(obj, compat_str) - if args[0] == '': - res = list(obj) - else: - res = obj.split(args[0]) + + if isinstance(obj, _AVMClass_Object): + func = self.extract_function(obj.avm_class, mname) + res = func(args) stack.append(res) - elif mname == 'slice': - assert len(args) == 1 - assert isinstance(args[0], int) - assert isinstance(obj, list) - res = obj[args[0]:] - stack.append(res) - elif mname == 'join': - assert len(args) == 1 - assert isinstance(args[0], compat_str) - assert isinstance(obj, list) - res = args[0].join(obj) - stack.append(res) - elif mname in avm_class.method_pyfunctions: - stack.append(avm_class.method_pyfunctions[mname](args)) - else: - raise NotImplementedError( - 'Unsupported property %r on %r' - % (mname, obj)) + continue + elif isinstance(obj, compat_str): + if mname == 'split': + assert len(args) == 1 + assert isinstance(args[0], compat_str) + if args[0] == '': + res = list(obj) + else: + res = obj.split(args[0]) + stack.append(res) + continue + elif isinstance(obj, list): + if mname == 'slice': + assert len(args) == 1 + assert isinstance(args[0], int) + res = obj[args[0]:] + stack.append(res) + continue + elif mname == 'join': + assert len(args) == 1 + assert isinstance(args[0], compat_str) + res = args[0].join(obj) + stack.append(res) + continue + raise NotImplementedError( + 'Unsupported property %r on %r' + % (mname, obj)) elif opcode == 72: # returnvalue res = stack.pop() return res @@ -446,11 +448,12 @@ class SWFInterpreter(object): obj = stack.pop() mname = self.multinames[index] + assert isinstance(obj, _AVMClass) construct_method = self.extract_function( - obj.avm_class, mname) + obj, mname) # We do not actually call the constructor for now; # we just pretend it does nothing - stack.append(obj) + stack.append(obj.make_object()) elif opcode == 79: # callpropvoid index = u30() mname = self.multinames[index] @@ -481,7 +484,7 @@ class SWFInterpreter(object): break else: res = scopes[0] - stack.append(res) + stack.append(res[mname]) elif opcode == 94: # findproperty index = u30() mname = self.multinames[index] @@ -490,7 +493,7 @@ class SWFInterpreter(object): res = s break else: - res = scopes[0] + res = avm_class.variables stack.append(res) elif opcode == 96: # getlex index = u30() @@ -500,7 +503,7 @@ class SWFInterpreter(object): scope = s break else: - scope = scopes[0] + scope = avm_class.variables # I cannot find where static variables are initialized # so let's just return None res = scope.get(mname) From 0d989011fffd768116d0ca81f6c067c7e0876f36 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 20 Jul 2014 14:49:10 +0200 Subject: [PATCH 272/340] [swfinterp] Add support for calling methods on objects --- test/swftests/PrivateCall.as | 21 +++++++++++++++++++++ youtube_dl/swfinterp.py | 31 ++++++++++++++++++++----------- 2 files changed, 41 insertions(+), 11 deletions(-) create mode 100644 test/swftests/PrivateCall.as diff --git a/test/swftests/PrivateCall.as b/test/swftests/PrivateCall.as new file mode 100644 index 000000000..f1c110a37 --- /dev/null +++ b/test/swftests/PrivateCall.as @@ -0,0 +1,21 @@ +// input: [] +// output: 9 + +package { +public class PrivateCall { + public static function main():int{ + var f:OtherClass = new OtherClass(); + return f.func(); + } +} +} + +class OtherClass { + private function pf():int { + return 9; + } + + public function func():int { + return this.pf(); + } +} diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 8ccb64c9d..d043c2f99 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -50,6 +50,17 @@ class _AVMClass_Object(object): return '%s#%x' % (self.avm_class.name, id(self)) +class _ScopeDict(dict): + def __init__(self, avm_class): + super(_ScopeDict, self).__init__() + self.avm_class = avm_class + + def __repr__(self): + return '%s__Scope(%s)' % ( + self.avm_class.name, + super(_ScopeDict, self).__repr__()) + + class _AVMClass(object): def __init__(self, name_idx, name): self.name_idx = name_idx @@ -59,17 +70,7 @@ class _AVMClass(object): self.methods = {} self.method_pyfunctions = {} - class ScopeDict(dict): - def __init__(self, avm_class): - super(ScopeDict, self).__init__() - self.avm_class = avm_class - - def __repr__(self): - return '%s__Scope(%s)' % ( - self.avm_class.name, - super(ScopeDict, self).__repr__()) - - self.variables = ScopeDict(self) + self.variables = _ScopeDict(self) def make_object(self): return _AVMClass_Object(self) @@ -411,6 +412,14 @@ class SWFInterpreter(object): res = func(args) stack.append(res) continue + elif isinstance(obj, _ScopeDict): + if mname in obj.avm_class.method_names: + func = self.extract_function(obj.avm_class, mname) + res = func(args) + else: + res = obj[mname] + stack.append(res) + continue elif isinstance(obj, compat_str): if mname == 'split': assert len(args) == 1 From decf2ae400d52e98bcd073a69b24b3dbf3d38d53 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 20 Jul 2014 18:28:49 +0200 Subject: [PATCH 273/340] [swfinterp] Correct array access --- test/swftests/ArrayAccess.as | 19 +++++++++++++++++++ youtube_dl/swfinterp.py | 20 +++++++++++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 test/swftests/ArrayAccess.as diff --git a/test/swftests/ArrayAccess.as b/test/swftests/ArrayAccess.as new file mode 100644 index 000000000..e22caa386 --- /dev/null +++ b/test/swftests/ArrayAccess.as @@ -0,0 +1,19 @@ +// input: [["a", "b", "c", "d"]] +// output: ["c", "b", "a", "d"] + +package { +public class ArrayAccess { + public static function main(ar:Array):Array { + var aa:ArrayAccess = new ArrayAccess(); + return aa.f(ar, 2); + } + + private function f(ar:Array, num:Number):Array{ + var x:String = ar[0]; + var y:String = ar[num % ar.length]; + ar[0] = y; + ar[num] = x; + return ar; + } +} +} diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index d043c2f99..812ee7e8c 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -85,6 +85,14 @@ class _AVMClass(object): for name, idx in methods.items())) +class _Multiname(object): + def __init__(self, kind): + self.kind = kind + + def __repr__(self): + return '[MULTINAME kind: 0x%x]' % self.kind + + def _read_int(reader): res = 0 shift = 0 @@ -205,7 +213,7 @@ class SWFInterpreter(object): name_idx = u30() self.multinames.append(self.constant_strings[name_idx]) else: - self.multinames.append('[MULTINAME kind: %d]' % kind) + self.multinames.append(_Multiname(kind)) for _c2 in range(MULTINAME_SIZES[kind]): u30() @@ -399,6 +407,13 @@ class SWFInterpreter(object): elif opcode == 48: # pushscope new_scope = stack.pop() scopes.append(new_scope) + elif opcode == 66: # construct + arg_count = u30() + args = list(reversed( + [stack.pop() for _ in range(arg_count)])) + obj = stack.pop() + res = obj.avm_class.make_object() + stack.append(res) elif opcode == 70: # callproperty index = u30() mname = self.multinames[index] @@ -521,7 +536,10 @@ class SWFInterpreter(object): index = u30() value = stack.pop() idx = self.multinames[index] + if isinstance(idx, _Multiname): + idx = stack.pop() obj = stack.pop() + print('Setting %r.%r = %r' % (obj, idx, value)) obj[idx] = value elif opcode == 98: # getlocal index = u30() From 1b38b5be867a01808390ee320fa03c6512177a9b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 20 Jul 2014 18:29:09 +0200 Subject: [PATCH 274/340] [swfinterp] Remove debugging code --- youtube_dl/swfinterp.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 812ee7e8c..79d86152d 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -361,7 +361,6 @@ class SWFInterpreter(object): raise ExtractorError('Class %r not found' % class_name) def extract_function(self, avm_class, func_name): - print('Extracting %s.%s' % (avm_class.name, func_name)) if func_name in avm_class.method_pyfunctions: return avm_class.method_pyfunctions[func_name] if func_name in self._classes_by_name: @@ -377,14 +376,12 @@ class SWFInterpreter(object): s24 = lambda: _s24(coder) u30 = lambda: _u30(coder) - print('Invoking %s.%s(%r)' % (avm_class.name, func_name, tuple(args))) registers = [avm_class.variables] + list(args) + [None] * m.local_count stack = [] scopes = collections.deque([ self._classes_by_name, avm_class.variables]) while True: opcode = _read_byte(coder) - print('opcode: %r, stack(%d): %r' % (opcode, len(stack), stack)) if opcode == 17: # iftrue offset = s24() value = stack.pop() @@ -539,7 +536,6 @@ class SWFInterpreter(object): if isinstance(idx, _Multiname): idx = stack.pop() obj = stack.pop() - print('Setting %r.%r = %r' % (obj, idx, value)) obj[idx] = value elif opcode == 98: # getlocal index = u30() From 7fd48d0413f1325619562a0ad0580e4a7fff34e1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 20 Jul 2014 18:30:27 +0200 Subject: [PATCH 275/340] [youtube] Correct signature testcase --- test/test_youtube_signature.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index e443e0be8..609e7078c 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -49,7 +49,7 @@ _TESTS = [ u'http://s.ytimg.com/yts/swfbin/player-vfl5vIhK2/watch_as3.swf', u'swf', 86, - u'23456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!?#$%&\'()*+,-./:;<=>"' + u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?' ), ] From cceb5ec2370ea1de950520937361c3f9ed4fe12a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 20 Jul 2014 18:47:03 +0200 Subject: [PATCH 276/340] release 2014.07.20 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 4d606c3d2..0214086d0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.15' +__version__ = '2014.07.20' From a5d524ef4676062335303ad249cc35a1a237ad07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 21 Jul 2014 00:28:55 +0700 Subject: [PATCH 277/340] [allocine] Update tests --- youtube_dl/extractor/allocine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py index 34f0cd49b..7bd797884 100644 --- a/youtube_dl/extractor/allocine.py +++ b/youtube_dl/extractor/allocine.py @@ -32,7 +32,7 @@ class AllocineIE(InfoExtractor): 'id': '19540403', 'ext': 'mp4', 'title': 'Planes 2 Bande-annonce VF', - 'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d', + 'description': 'md5:eeaffe7c2d634525e21159b93acf3b1e', 'thumbnail': 're:http://.*\.jpg', }, }, { @@ -42,7 +42,7 @@ class AllocineIE(InfoExtractor): 'id': '19544709', 'ext': 'mp4', 'title': 'Dragons 2 - Bande annonce finale VF', - 'description': 'md5:e74a4dc750894bac300ece46c7036490', + 'description': 'md5:71742e3a74b0d692c7fce0dd2017a4ac', 'thumbnail': 're:http://.*\.jpg', }, }] From b8c74d606aff4a325cdcc951df99495690f40054 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 20 Jul 2014 20:20:42 +0200 Subject: [PATCH 278/340] [youtube] fix display of swf player id --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 623056bd9..6e77504bf 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -834,7 +834,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): else: if player_url.endswith('swf'): player_version = self._search_regex( - r'-(.+)\.swf$', player_url, + r'-(.+?)(?:/watch_as3)?\.swf$', player_url, u'flash player', fatal=False) player_desc = 'flash player %s' % player_version else: From b6ea11b9675fadd6961fb46ea35bfb88df902ae5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 20 Jul 2014 20:45:36 +0200 Subject: [PATCH 279/340] [youtube] Add swf signature test case (#3270) --- test/test_youtube_signature.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 609e7078c..e8a67c4c0 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -51,6 +51,12 @@ _TESTS = [ 86, u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVWXY\\!"#$%&\'()*+,-./:;<=>?' ), + ( + u'http://s.ytimg.com/yts/swfbin/player-vflmDyk47/watch_as3.swf', + u'swf', + u'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9', + u'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F' + ), ] From 2c57c7fa5a1871d750ded6610c4fd6ee55bd96a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 20 Jul 2014 21:05:02 +0200 Subject: [PATCH 280/340] [youtube] Fix extraction of age gate videos (closes #3270) Setting the correct value of the 'sts' paramater in the 'get_video_info' url gives the correct urls. Removed parameters that are not needed. --- youtube_dl/extractor/youtube.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6e77504bf..071aa7519 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -609,14 +609,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): age_gate = True # We simulate the access to the video from www.youtube.com/v/{video_id} # this can be viewed without login into Youtube - data = compat_urllib_parse.urlencode({'video_id': video_id, - 'el': 'player_embedded', - 'gl': 'US', - 'hl': 'en', - 'eurl': 'https://youtube.googleapis.com/v/' + video_id, - 'asv': 3, - 'sts':'1588', - }) + data = compat_urllib_parse.urlencode({ + 'video_id': video_id, + 'eurl': 'https://youtube.googleapis.com/v/' + video_id, + 'sts':'16268', + }) video_info_url = proto + '://www.youtube.com/get_video_info?' + data video_info_webpage = self._download_webpage(video_info_url, video_id, note=False, From 29546b345b4926e46f1259a8ea08826009baf1a9 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 20 Jul 2014 21:38:02 +0200 Subject: [PATCH 281/340] [ard] Add support for NDR-style videos (fixes #3281) --- youtube_dl/extractor/ard.py | 103 ++++++++++++++++++++++++------------ 1 file changed, 68 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index b36a4d46a..30a85c8c1 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -7,23 +7,32 @@ from .common import InfoExtractor from ..utils import ( determine_ext, ExtractorError, + qualities, ) class ARDIE(InfoExtractor): - _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[^/\?]+)(?:\?.*)?' + _VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.daserste\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?' - _TEST = { - 'url': 'http://www.ardmediathek.de/das-erste/guenther-jauch/edward-snowden-im-interview-held-oder-verraeter?documentId=19288786', - 'file': '19288786.mp4', - 'md5': '515bf47ce209fb3f5a61b7aad364634c', + _TESTS = [{ + 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', + 'file': '22429276.mp4', + 'md5': '469751912f1de0816a9fc9df8336476c', 'info_dict': { - 'title': 'Edward Snowden im Interview - Held oder Verräter?', - 'description': 'Edward Snowden hat alles aufs Spiel gesetzt, um die weltweite \xdcberwachung durch die Geheimdienste zu enttarnen. Nun stellt sich der ehemalige NSA-Mitarbeiter erstmals weltweit in einem TV-Interview den Fragen eines NDR-Journalisten. Die Sendung vom Sonntagabend.', - 'thumbnail': 'http://www.ardmediathek.de/ard/servlet/contentblob/19/28/87/90/19288790/bild/2250037', + 'title': 'Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?', + 'description': 'Das Erste Mediathek [ARD]: Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?, Anne Will, Über die Spionage-Affäre diskutieren Clemens Binninger, Katrin Göring-Eckardt, Georg Mascolo, Andrew B. Denison und Constanze Kurz.. Das Video zur Sendung Anne Will am Mittwoch, 16.07.2014', }, 'skip': 'Blocked outside of Germany', - } + }, { + 'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916', + 'info_dict': { + 'id': '22490580', + 'ext': 'mp4', + 'title': 'Das Wunder von Wolbeck (Video tgl. ab 20 Uhr)', + 'description': 'Auf einem restaurierten Hof bei Wolbeck wird der Heilpraktiker Raffael Lembeck eines morgens von seiner Frau Stella tot aufgefunden. Das Opfer war offensichtlich in seiner Praxis zu Fall gekommen und ist dann verblutet, erklärt Prof. Boerne am Tatort.', + }, + 'skip': 'Blocked outside of Germany', + }] def _real_extract(self, url): # determine video id from url @@ -43,40 +52,64 @@ class ARDIE(InfoExtractor): r'<h4 class="headline">(.*?)</h4>'], webpage, 'title') description = self._html_search_meta( - 'dcterms.abstract', webpage, 'description') - thumbnail = self._og_search_thumbnail(webpage) + 'dcterms.abstract', webpage, 'description', default=None) + if description is None: + description = self._html_search_meta( + 'description', webpage, 'meta description') + # Thumbnail is sometimes not present. + # It is in the mobile version, but that seems to use a different URL + # structure altogether. + thumbnail = self._og_search_thumbnail(webpage, default=None) - media_info = self._download_json( - 'http://www.ardmediathek.de/play/media/%s' % video_id, video_id) - # The second element of the _mediaArray contains the standard http urls - streams = media_info['_mediaArray'][1]['_mediaStreamArray'] - if not streams: - if '"fsk"' in webpage: - raise ExtractorError('This video is only available after 20:00') + media_streams = re.findall(r'''(?x) + mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s* + "([^"]+)"''', webpage) - formats = [] + if media_streams: + QUALITIES = qualities(['lo', 'hi', 'hq']) + formats = [] + for furl in set(media_streams): + if furl.endswith('.f4m'): + fid = 'f4m' + else: + fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl) + fid = fid_m.group(1) if fid_m else None + formats.append({ + 'quality': QUALITIES(fid), + 'format_id': fid, + 'url': furl, + }) + else: # request JSON file + media_info = self._download_json( + 'http://www.ardmediathek.de/play/media/%s' % video_id, video_id) + # The second element of the _mediaArray contains the standard http urls + streams = media_info['_mediaArray'][1]['_mediaStreamArray'] + if not streams: + if '"fsk"' in webpage: + raise ExtractorError('This video is only available after 20:00') - for s in streams: - if type(s['_stream']) == list: - for index, url in enumerate(s['_stream'][::-1]): - quality = s['_quality'] + index - formats.append({ - 'quality': quality, - 'url': url, - 'format_id': '%s-%s' % (determine_ext(url), quality) + formats = [] + for s in streams: + if type(s['_stream']) == list: + for index, url in enumerate(s['_stream'][::-1]): + quality = s['_quality'] + index + formats.append({ + 'quality': quality, + 'url': url, + 'format_id': '%s-%s' % (determine_ext(url), quality) }) - continue + continue - format = { - 'quality': s['_quality'], - 'url': s['_stream'], - } + format = { + 'quality': s['_quality'], + 'url': s['_stream'], + } - format['format_id'] = '%s-%s' % ( - determine_ext(format['url']), format['quality']) + format['format_id'] = '%s-%s' % ( + determine_ext(format['url']), format['quality']) - formats.append(format) + formats.append(format) self._sort_formats(formats) From f3308e138df3458581c8824d0d3eaa46b4b1a6d1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 20 Jul 2014 21:38:29 +0200 Subject: [PATCH 282/340] release 2014.07.20.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0214086d0..c68062059 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.20' +__version__ = '2014.07.20.1' From c13bf7c836e1befb28070fe393e474566a43409a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 20 Jul 2014 23:20:15 +0200 Subject: [PATCH 283/340] [swfinterp] Use helper function struct_unpack for old Python 2.x releases (#3270) --- youtube_dl/swfinterp.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 79d86152d..87ec7bcff 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -2,12 +2,12 @@ from __future__ import unicode_literals import collections import io -import struct import zlib from .utils import ( compat_str, ExtractorError, + struct_unpack, ) @@ -23,17 +23,17 @@ def _extract_tags(file_contents): file_contents[:1]) # Determine number of bits in framesize rectangle - framesize_nbits = struct.unpack('!B', content[:1])[0] >> 3 + framesize_nbits = struct_unpack('!B', content[:1])[0] >> 3 framesize_len = (5 + 4 * framesize_nbits + 7) // 8 pos = framesize_len + 2 + 2 while pos < len(content): - header16 = struct.unpack('<H', content[pos:pos + 2])[0] + header16 = struct_unpack('<H', content[pos:pos + 2])[0] pos += 2 tag_code = header16 >> 6 tag_len = header16 & 0x3f if tag_len == 0x3f: - tag_len = struct.unpack('<I', content[pos:pos + 4])[0] + tag_len = struct_unpack('<I', content[pos:pos + 4])[0] pos += 4 assert pos + tag_len <= len(content), \ ('Tag %d ends at %d+%d - that\'s longer than the file (%d)' @@ -99,7 +99,7 @@ def _read_int(reader): for _ in range(5): buf = reader.read(1) assert len(buf) == 1 - b = struct.unpack('<B', buf)[0] + b = struct_unpack('<B', buf)[0] res = res | ((b & 0x7f) << shift) if b & 0x80 == 0: break @@ -125,7 +125,7 @@ def _s24(reader): bs = reader.read(3) assert len(bs) == 3 last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00' - return struct.unpack('<i', bs + last_byte)[0] + return struct_unpack('<i', bs + last_byte)[0] def _read_string(reader): @@ -144,7 +144,7 @@ def _read_bytes(count, reader): def _read_byte(reader): resb = _read_bytes(1, reader=reader) - res = struct.unpack('<B', resb)[0] + res = struct_unpack('<B', resb)[0] return res From 727d2930f25164bff9a066f78ff21643f6e70ec8 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 20 Jul 2014 23:23:01 +0200 Subject: [PATCH 284/340] release 2014.07.20.2 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c68062059..e2e0ee25c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.20.1' +__version__ = '2014.07.20.2' From 72e785f36ad215d48915b9207c403c5ce54cc9da Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 20 Jul 2014 23:34:20 +0200 Subject: [PATCH 285/340] [livestream] PEP8 --- youtube_dl/extractor/livestream.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py index 2c100d424..1ea1bbab4 100644 --- a/youtube_dl/extractor/livestream.py +++ b/youtube_dl/extractor/livestream.py @@ -28,11 +28,13 @@ class LivestreamIE(InfoExtractor): } def _extract_video_info(self, video_data): - video_url = video_data.get('progressive_url_hd') or video_data.get('progressive_url') + video_url = ( + video_data.get('progressive_url_hd') or + video_data.get('progressive_url') + ) return { 'id': compat_str(video_data['id']), 'url': video_url, - 'ext': 'mp4', 'title': video_data['caption'], 'thumbnail': video_data['thumbnail_url'], 'upload_date': video_data['updated_at'].replace('-', '')[:8], @@ -50,7 +52,8 @@ class LivestreamIE(InfoExtractor): r'window.config = ({.*?});', webpage, 'window config') info = json.loads(config_json)['event'] videos = [self._extract_video_info(video_data['data']) - for video_data in info['feed']['data'] if video_data['type'] == 'video'] + for video_data in info['feed']['data'] + if video_data['type'] == 'video'] return self.playlist_result(videos, info['id'], info['full_name']) else: og_video = self._og_search_video_url(webpage, 'player url') From 351f3738656899afa625ef3bdf640aeeacae00ff Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 20 Jul 2014 23:36:21 +0200 Subject: [PATCH 286/340] [swfinterp] Fix _u32 name --- youtube_dl/swfinterp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index 87ec7bcff..a6f4ba6e0 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -111,7 +111,7 @@ def _u30(reader): res = _read_int(reader) assert res & 0xf0000000 == 0 return res -u32 = _read_int +_u32 = _read_int def _s32(reader): From 7fbf54dc62b43884d49d1d96854dc82a38b8b42f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 20 Jul 2014 23:37:10 +0200 Subject: [PATCH 287/340] [swfinterp] Remove (at the moment) dead code --- youtube_dl/swfinterp.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/swfinterp.py b/youtube_dl/swfinterp.py index a6f4ba6e0..b63c65b20 100644 --- a/youtube_dl/swfinterp.py +++ b/youtube_dl/swfinterp.py @@ -470,8 +470,7 @@ class SWFInterpreter(object): mname = self.multinames[index] assert isinstance(obj, _AVMClass) - construct_method = self.extract_function( - obj, mname) + # We do not actually call the constructor for now; # we just pretend it does nothing stack.append(obj.make_object()) From 246168bd72a8f031adb243e9465ef52f62fb502c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Sun, 20 Jul 2014 23:38:44 +0200 Subject: [PATCH 288/340] Remove unused imports --- youtube_dl/extractor/firedrive.py | 1 - youtube_dl/extractor/tenplay.py | 2 -- youtube_dl/extractor/youtube.py | 3 --- 3 files changed, 6 deletions(-) diff --git a/youtube_dl/extractor/firedrive.py b/youtube_dl/extractor/firedrive.py index d26145db1..6d73c8a4a 100644 --- a/youtube_dl/extractor/firedrive.py +++ b/youtube_dl/extractor/firedrive.py @@ -8,7 +8,6 @@ from ..utils import ( ExtractorError, compat_urllib_parse, compat_urllib_request, - determine_ext, ) diff --git a/youtube_dl/extractor/tenplay.py b/youtube_dl/extractor/tenplay.py index 8477840fc..81ba169fb 100644 --- a/youtube_dl/extractor/tenplay.py +++ b/youtube_dl/extractor/tenplay.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 071aa7519..072e711c2 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1,15 +1,12 @@ # coding: utf-8 -import collections import errno import io import itertools import json import os.path import re -import struct import traceback -import zlib from .common import InfoExtractor, SearchInfoExtractor from .subtitles import SubtitlesInfoExtractor From eef4a7a3042914e4cad6d46a90308567f012ae59 Mon Sep 17 00:00:00 2001 From: "Anthony J. Bentley" <anthony@cathet.us> Date: Sun, 20 Jul 2014 18:37:44 -0600 Subject: [PATCH 289/340] =?UTF-8?q?Fix=20typo:=20=E2=80=9Cytseach=E2=80=9D?= =?UTF-8?q?=20=E2=86=92=20=E2=80=9Cytsearch=E2=80=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index f97b59845..9db27f9aa 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -402,7 +402,7 @@ class GenericIE(InfoExtractor): elif default_search == 'error': raise ExtractorError( ('%r is not a valid URL. ' - 'Set --default-search "ytseach" (or run youtube-dl "ytsearch:%s" ) to search YouTube' + 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube' ) % (url, url), expected=True) else: assert ':' in default_search From 9732d77ed273406afcf9ed3ccb4d109824c9c69d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 12:02:44 +0200 Subject: [PATCH 290/340] [snotr] PEP8 and minor fixes (#3296) --- youtube_dl/YoutubeDL.py | 4 +++ youtube_dl/extractor/common.py | 2 ++ youtube_dl/extractor/snotr.py | 55 +++++++++++++++------------------- youtube_dl/utils.py | 22 +++++++------- 4 files changed, 41 insertions(+), 42 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 3dff723b8..686988fe5 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1197,6 +1197,10 @@ class YoutubeDL(object): if res: res += ', ' res += format_bytes(fdict['filesize']) + elif fdict.get('filesize_approx') is not None: + if res: + res += ', ' + res += '~' + format_bytes(fdict['filesize_approx']) return res def list_formats(self, info_dict): diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e68657314..3213abacf 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -69,6 +69,7 @@ class InfoExtractor(object): * vcodec Name of the video codec in use * container Name of the container format * filesize The number of bytes, if known in advance + * filesize_approx An estimate for the number of bytes * player_url SWF Player URL (used for rtmpdump). * protocol The protocol that will be used for the actual download, lower-case. @@ -555,6 +556,7 @@ class InfoExtractor(object): f.get('abr') if f.get('abr') is not None else -1, audio_ext_preference, f.get('filesize') if f.get('filesize') is not None else -1, + f.get('filesize_approx') if f.get('filesize_approx') is not None else -1, f.get('format_id'), ) formats.sort(key=_formats_key) diff --git a/youtube_dl/extractor/snotr.py b/youtube_dl/extractor/snotr.py index f89e81bf3..e762ad8f6 100644 --- a/youtube_dl/extractor/snotr.py +++ b/youtube_dl/extractor/snotr.py @@ -4,49 +4,39 @@ from __future__ import unicode_literals import re from .common import InfoExtractor - from ..utils import ( - + float_or_none, str_to_int, - parse_iso8601, - - - + parse_duration, ) + class SnotrIE(InfoExtractor): _VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)' - _TESTS =[ { + _TESTS = [{ 'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks', 'info_dict': { 'id': '13708', 'ext': 'flv', 'title': 'Drone flying through fireworks!', 'duration': 247, - 'filesize':12320768 - } - }, - - - - { - + 'filesize_approx': 98566144, + } + }, { 'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10', 'info_dict': { 'id': '530', 'ext': 'flv', 'title': 'David Letteman - George W. Bush Top 10', 'duration': 126, - 'filesize': 1048576 - } - }] - + 'filesize_approx': 8912896, + } + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - # TODO more code goes here, for example ... webpage = self._download_webpage(url, video_id) title = self._og_search_title(webpage) @@ -54,20 +44,23 @@ class SnotrIE(InfoExtractor): video_url = "http://cdn.videos.snotr.com/%s.flv" % video_id - view_count = str_to_int(self._html_search_regex(r'<p>\n<strong>Views:</strong>\n([\d,\.]+)</p>',webpage,'view count')) + view_count = str_to_int(self._html_search_regex( + r'<p>\n<strong>Views:</strong>\n([\d,\.]+)</p>', + webpage, 'view count', fatal=False)) - duration = self._html_search_regex(r'<p>\n<strong>Length:</strong>\n(.*?)</p>',webpage,'duration') - duration = str_to_int(duration[:1])*60 + str_to_int(duration[2:4]) + duration = parse_duration(self._html_search_regex( + r'<p>\n<strong>Length:</strong>\n\s*([0-9:]+).*?</p>', + webpage, 'duration', fatal=False)) - file_size = self._html_search_regex(r'<p>\n<strong>Filesize:</strong>\n(.*?)</p>',webpage,'filesize') - file_size = str_to_int(re.match(r'\d+',file_size).group())*131072 + filesize_approx = float_or_none(self._html_search_regex( + r'<p>\n<strong>Filesize:</strong>\n\s*([0-9.]+)\s*megabyte</p>', + webpage, 'filesize', fatal=False), invscale=1024 * 1024) return { 'id': video_id, 'title': title, - 'url':video_url, - 'view_count':view_count, - 'duration':duration, - 'filesize':file_size - - } \ No newline at end of file + 'url': video_url, + 'view_count': view_count, + 'duration': duration, + 'filesize_approx': filesize_approx, + } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 919603c62..bf4d1112f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1193,13 +1193,6 @@ def format_bytes(bytes): return u'%.2f%s' % (converted, suffix) -def str_to_int(int_str): - if int_str is None: - return None - int_str = re.sub(r'[,\.]', u'', int_str) - return int(int_str) - - def get_term_width(): columns = os.environ.get('COLUMNS', None) if columns: @@ -1267,15 +1260,22 @@ class HEADRequest(compat_urllib_request.Request): return "HEAD" -def int_or_none(v, scale=1, default=None, get_attr=None): +def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): if get_attr: if v is not None: v = getattr(v, get_attr, None) - return default if v is None else (int(v) // scale) + return default if v is None else (int(v) * invscale // scale) -def float_or_none(v, scale=1, default=None): - return default if v is None else (float(v) / scale) +def str_to_int(int_str): + if int_str is None: + return None + int_str = re.sub(r'[,\.]', u'', int_str) + return int(int_str) + + +def float_or_none(v, scale=1, invscale=1, default=None): + return default if v is None else (float(v) * invscale / scale) def parse_duration(s): From 54330a1c3c3d4f3c4ce520e0deeece68120c3051 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 12:07:26 +0200 Subject: [PATCH 291/340] [swfinterp] Fix imports --- test/test_swfinterp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_swfinterp.py b/test/test_swfinterp.py index 3bb5a6308..b42cd74c7 100644 --- a/test/test_swfinterp.py +++ b/test/test_swfinterp.py @@ -7,6 +7,7 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import errno import io import json import re From da8fb85859964d9a1d21a0328eb9044e19499d9c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 12:08:44 +0200 Subject: [PATCH 292/340] [snotr] Add description --- youtube_dl/extractor/snotr.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/snotr.py b/youtube_dl/extractor/snotr.py index e762ad8f6..da3b05a8d 100644 --- a/youtube_dl/extractor/snotr.py +++ b/youtube_dl/extractor/snotr.py @@ -21,6 +21,7 @@ class SnotrIE(InfoExtractor): 'title': 'Drone flying through fireworks!', 'duration': 247, 'filesize_approx': 98566144, + 'description': 'A drone flying through Fourth of July Fireworks', } }, { 'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10', @@ -30,6 +31,7 @@ class SnotrIE(InfoExtractor): 'title': 'David Letteman - George W. Bush Top 10', 'duration': 126, 'filesize_approx': 8912896, + 'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!', } }] @@ -41,7 +43,6 @@ class SnotrIE(InfoExtractor): title = self._og_search_title(webpage) description = self._og_search_description(webpage) - video_url = "http://cdn.videos.snotr.com/%s.flv" % video_id view_count = str_to_int(self._html_search_regex( @@ -58,6 +59,7 @@ class SnotrIE(InfoExtractor): return { 'id': video_id, + 'description': description, 'title': title, 'url': video_url, 'view_count': view_count, From db964a33a1c8ec0449fe2e39cf8d5de70daaffc2 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 12:12:50 +0200 Subject: [PATCH 293/340] Remove unused imports --- youtube_dl/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index f223b75f4..0e7b9ddaf 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -72,11 +72,9 @@ __license__ = 'Public Domain' import codecs import io -import locale import optparse import os import random -import re import shlex import sys From 9aeaf730ad712aed29d241d4e6655b8e5fee1d47 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 12:14:06 +0200 Subject: [PATCH 294/340] [rtve] Fix md5sum Looks like these guys reencoded the video. --- youtube_dl/extractor/rtve.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 77fd08dde..c2228b2f0 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -17,7 +17,7 @@ class RTVEALaCartaIE(InfoExtractor): _TEST = { 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', - 'md5': '18fcd45965bdd076efdb12cd7f6d7b9e', + 'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43', 'info_dict': { 'id': '2491869', 'ext': 'mp4', From 468d19a9c15f1a3ddd5363c4a966667d777782b0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 12:15:23 +0200 Subject: [PATCH 295/340] [savefrom] Fix test description --- youtube_dl/extractor/savefrom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/savefrom.py b/youtube_dl/extractor/savefrom.py index 198a08c1c..ccd545971 100644 --- a/youtube_dl/extractor/savefrom.py +++ b/youtube_dl/extractor/savefrom.py @@ -20,7 +20,7 @@ class SaveFromIE(InfoExtractor): 'upload_date': '20120816', 'uploader': 'Howcast', 'uploader_id': 'Howcast', - 'description': 'md5:4f0aac94361a12e1ce57d74f85265175', + 'description': 'md5:727900f130df3dc9a25e2721497c7910', }, 'params': { 'skip_download': True From 4f95d455edf20583abd85801c23e88fa749be237 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 12:17:44 +0200 Subject: [PATCH 296/340] [steam] Update test description --- youtube_dl/extractor/steam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/steam.py b/youtube_dl/extractor/steam.py index af689e2c2..183dcb03c 100644 --- a/youtube_dl/extractor/steam.py +++ b/youtube_dl/extractor/steam.py @@ -53,7 +53,7 @@ class SteamIE(InfoExtractor): 'ext': 'mp4', 'upload_date': '20140329', 'title': 'FRONTIERS - Final Greenlight Trailer', - 'description': 'md5:6df4fe8dd494ae811869672b0767e025', + 'description': 'md5:dc96a773669d0ca1b36c13c1f30250d9', 'uploader': 'AAD Productions', 'uploader_id': 'AtomicAgeDogGames', } From d8624e6a80751c09a48ff6b9db1d4d85e377c437 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 12:25:49 +0200 Subject: [PATCH 297/340] [test_playlist] Add and use assertGreaterEqual --- test/helper.py | 7 +++++++ test/test_playlists.py | 47 +++++++++++++++++++++--------------------- 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/test/helper.py b/test/helper.py index 230d2bd67..84b16f770 100644 --- a/test/helper.py +++ b/test/helper.py @@ -148,3 +148,10 @@ def assertRegexpMatches(self, text, regexp, msg=None): else: msg = note + ', ' + msg self.assertTrue(m, msg) + + +def assertGreaterEqual(self, got, expected, msg=None): + if not (got >= expected): + if msg is None: + msg = '%r not greater than or equal to %r' % (got, expected) + self.assertTrue(got >= expected, msg) diff --git a/test/test_playlists.py b/test/test_playlists.py index 1a38a667b..4789200e9 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -11,6 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import ( assertRegexpMatches, + assertGreaterEqual, expect_info_dict, FakeYDL, ) @@ -71,8 +72,8 @@ class TestPlaylists(unittest.TestCase): ie = DailymotionUserIE(dl) result = ie.extract('https://www.dailymotion.com/user/nqtv') self.assertIsPlaylist(result) + assertGreaterEqual(self, len(result['entries']), 100) self.assertEqual(result['title'], 'Rémi Gaillard') - self.assertTrue(len(result['entries']) >= 100) def test_vimeo_channel(self): dl = FakeYDL() @@ -111,7 +112,7 @@ class TestPlaylists(unittest.TestCase): ie = VineUserIE(dl) result = ie.extract('https://vine.co/Visa') self.assertIsPlaylist(result) - self.assertTrue(len(result['entries']) >= 47) + assertGreaterEqual(self, len(result['entries']), 47) def test_ustream_channel(self): dl = FakeYDL() @@ -119,7 +120,7 @@ class TestPlaylists(unittest.TestCase): result = ie.extract('http://www.ustream.tv/channel/channeljapan') self.assertIsPlaylist(result) self.assertEqual(result['id'], '10874166') - self.assertTrue(len(result['entries']) >= 54) + assertGreaterEqual(self, len(result['entries']), 54) def test_soundcloud_set(self): dl = FakeYDL() @@ -127,7 +128,7 @@ class TestPlaylists(unittest.TestCase): result = ie.extract('https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep') self.assertIsPlaylist(result) self.assertEqual(result['title'], 'The Royal Concept EP') - self.assertTrue(len(result['entries']) >= 6) + assertGreaterEqual(self, len(result['entries']), 6) def test_soundcloud_user(self): dl = FakeYDL() @@ -135,7 +136,7 @@ class TestPlaylists(unittest.TestCase): result = ie.extract('https://soundcloud.com/the-concept-band') self.assertIsPlaylist(result) self.assertEqual(result['id'], '9615865') - self.assertTrue(len(result['entries']) >= 12) + assertGreaterEqual(self, len(result['entries']), 12) def test_soundcloud_likes(self): dl = FakeYDL() @@ -143,7 +144,7 @@ class TestPlaylists(unittest.TestCase): result = ie.extract('https://soundcloud.com/the-concept-band/likes') self.assertIsPlaylist(result) self.assertEqual(result['id'], '9615865') - self.assertTrue(len(result['entries']) >= 1) + assertGreaterEqual(self, len(result['entries']), 1) def test_soundcloud_playlist(self): dl = FakeYDL() @@ -162,7 +163,7 @@ class TestPlaylists(unittest.TestCase): result = ie.extract('http://new.livestream.com/tedx/cityenglish') self.assertIsPlaylist(result) self.assertEqual(result['title'], 'TEDCity2.0 (English)') - self.assertTrue(len(result['entries']) >= 4) + assertGreaterEqual(self, len(result['entries']), 4) def test_livestreamoriginal_folder(self): dl = FakeYDL() @@ -170,7 +171,7 @@ class TestPlaylists(unittest.TestCase): result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3') - self.assertTrue(len(result['entries']) >= 28) + assertGreaterEqual(self, len(result['entries']), 28) def test_nhl_videocenter(self): dl = FakeYDL() @@ -187,7 +188,7 @@ class TestPlaylists(unittest.TestCase): result = ie.extract('http://bambuser.com/channel/pixelversity') self.assertIsPlaylist(result) self.assertEqual(result['title'], 'pixelversity') - self.assertTrue(len(result['entries']) >= 60) + assertGreaterEqual(self, len(result['entries']), 60) def test_bandcamp_album(self): dl = FakeYDL() @@ -195,7 +196,7 @@ class TestPlaylists(unittest.TestCase): result = ie.extract('http://mpallante.bandcamp.com/album/nightmare-night-ep') self.assertIsPlaylist(result) self.assertEqual(result['title'], 'Nightmare Night EP') - self.assertTrue(len(result['entries']) >= 4) + assertGreaterEqual(self, len(result['entries']), 4) def test_smotri_community(self): dl = FakeYDL() @@ -204,7 +205,7 @@ class TestPlaylists(unittest.TestCase): self.assertIsPlaylist(result) self.assertEqual(result['id'], 'kommuna') self.assertEqual(result['title'], 'КПРФ') - self.assertTrue(len(result['entries']) >= 4) + assertGreaterEqual(self, len(result['entries']), 4) def test_smotri_user(self): dl = FakeYDL() @@ -213,7 +214,7 @@ class TestPlaylists(unittest.TestCase): self.assertIsPlaylist(result) self.assertEqual(result['id'], 'inspector') self.assertEqual(result['title'], 'Inspector') - self.assertTrue(len(result['entries']) >= 9) + assertGreaterEqual(self, len(result['entries']), 9) def test_AcademicEarthCourse(self): dl = FakeYDL() @@ -232,7 +233,7 @@ class TestPlaylists(unittest.TestCase): self.assertIsPlaylist(result) self.assertEqual(result['id'], 'dvoe_iz_lartsa') self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008)') - self.assertTrue(len(result['entries']) >= 24) + assertGreaterEqual(self, len(result['entries']), 24) def test_ivi_compilation_season(self): dl = FakeYDL() @@ -241,7 +242,7 @@ class TestPlaylists(unittest.TestCase): self.assertIsPlaylist(result) self.assertEqual(result['id'], 'dvoe_iz_lartsa/season1') self.assertEqual(result['title'], 'Двое из ларца (2006 - 2008) 1 сезон') - self.assertTrue(len(result['entries']) >= 12) + assertGreaterEqual(self, len(result['entries']), 12) def test_imdb_list(self): dl = FakeYDL() @@ -260,7 +261,7 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['id'], 'cryptography') self.assertEqual(result['title'], 'Journey into cryptography') self.assertEqual(result['description'], 'How have humans protected their secret messages through history? What has changed today?') - self.assertTrue(len(result['entries']) >= 3) + assertGreaterEqual(self, len(result['entries']), 3) def test_EveryonesMixtape(self): dl = FakeYDL() @@ -277,7 +278,7 @@ class TestPlaylists(unittest.TestCase): result = ie.extract('http://rutube.ru/tags/video/1800/') self.assertIsPlaylist(result) self.assertEqual(result['id'], '1800') - self.assertTrue(len(result['entries']) >= 68) + assertGreaterEqual(self, len(result['entries']), 68) def test_rutube_person(self): dl = FakeYDL() @@ -285,7 +286,7 @@ class TestPlaylists(unittest.TestCase): result = ie.extract('http://rutube.ru/video/person/313878/') self.assertIsPlaylist(result) self.assertEqual(result['id'], '313878') - self.assertTrue(len(result['entries']) >= 37) + assertGreaterEqual(self, len(result['entries']), 37) def test_multiple_brightcove_videos(self): # https://github.com/rg3/youtube-dl/issues/2283 @@ -322,7 +323,7 @@ class TestPlaylists(unittest.TestCase): self.assertIsPlaylist(result) self.assertEqual(result['id'], '10') self.assertEqual(result['title'], 'Who are the hackers?') - self.assertTrue(len(result['entries']) >= 6) + assertGreaterEqual(self, len(result['entries']), 6) def test_toypics_user(self): dl = FakeYDL() @@ -330,7 +331,7 @@ class TestPlaylists(unittest.TestCase): result = ie.extract('http://videos.toypics.net/Mikey') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'Mikey') - self.assertTrue(len(result['entries']) >= 17) + assertGreaterEqual(self, len(result['entries']), 17) def test_xtube_user(self): dl = FakeYDL() @@ -338,7 +339,7 @@ class TestPlaylists(unittest.TestCase): result = ie.extract('http://www.xtube.com/community/profile.php?user=greenshowers') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'greenshowers') - self.assertTrue(len(result['entries']) >= 155) + assertGreaterEqual(self, len(result['entries']), 155) def test_InstagramUser(self): dl = FakeYDL() @@ -346,7 +347,7 @@ class TestPlaylists(unittest.TestCase): result = ie.extract('http://instagram.com/porsche') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'porsche') - self.assertTrue(len(result['entries']) >= 2) + assertGreaterEqual(self, len(result['entries']), 2) test_video = next( e for e in result['entries'] if e['id'] == '614605558512799803_462752227') @@ -385,7 +386,7 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['id'], '152147') self.assertEqual( result['title'], 'Brace Yourself - Today\'s Weirdest News') - self.assertTrue(len(result['entries']) >= 10) + assertGreaterEqual(self, len(result['entries']), 10) def test_TeacherTubeUser(self): dl = FakeYDL() @@ -393,7 +394,7 @@ class TestPlaylists(unittest.TestCase): result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2') self.assertIsPlaylist(result) self.assertEqual(result['id'], 'rbhagwati2') - self.assertTrue(len(result['entries']) >= 179) + assertGreaterEqual(self, len(result['entries']), 179) if __name__ == '__main__': unittest.main() From 1a30deca50d6256bb833aee672d5055d72319aca Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 12:47:01 +0200 Subject: [PATCH 298/340] [teachertube] Fix title and playlist recognition --- youtube_dl/extractor/common.py | 2 +- youtube_dl/extractor/teachertube.py | 20 +++++++++++--------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 3213abacf..9b36e0789 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -469,7 +469,7 @@ class InfoExtractor(object): display_name = name return self._html_search_regex( r'''(?ix)<meta - (?=[^>]+(?:itemprop|name|property)=["\']%s["\']) + (?=[^>]+(?:itemprop|name|property)=["\']?%s["\']?) [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), html, display_name, fatal=fatal, **kwargs) diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dl/extractor/teachertube.py index 2c2113b14..46d727d1d 100644 --- a/youtube_dl/extractor/teachertube.py +++ b/youtube_dl/extractor/teachertube.py @@ -62,7 +62,7 @@ class TeacherTubeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - title = self._html_search_meta('title', webpage, 'title') + title = self._html_search_meta('title', webpage, 'title', fatal=True) TITLE_SUFFIX = ' - TeacherTube' if title.endswith(TITLE_SUFFIX): title = title[:-len(TITLE_SUFFIX)].strip() @@ -101,7 +101,11 @@ class TeacherTubeUserIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?' - _MEDIA_RE = r'(?s)"sidebar_thumb_time">[0-9:]+</div>.+?<a href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)">' + _MEDIA_RE = r'''(?sx) + class="?sidebar_thumb_time"?>[0-9:]+</div> + \s* + <a\s+href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)" + ''' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -111,14 +115,12 @@ class TeacherTubeUserIE(InfoExtractor): webpage = self._download_webpage(url, user_id) urls.extend(re.findall(self._MEDIA_RE, webpage)) - pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1] + pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[:-1] for p in pages: more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p) - webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1)) - urls.extend(re.findall(self._MEDIA_RE, webpage)) - - entries = [] - for url in urls: - entries.append(self.url_result(url, 'TeacherTube')) + webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages))) + video_urls = re.findall(self._MEDIA_RE, webpage) + urls.extend(video_urls) + entries = [self.url_result(vurl, 'TeacherTube') for vurl in urls] return self.playlist_result(entries, user_id) From 264a7044f59373291bae6a662b9173d55a4b9925 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 12:57:40 +0200 Subject: [PATCH 299/340] [dropbox] Fix test and add support for spaces in filenames --- youtube_dl/extractor/dropbox.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/dropbox.py b/youtube_dl/extractor/dropbox.py index 41208c976..1711f0263 100644 --- a/youtube_dl/extractor/dropbox.py +++ b/youtube_dl/extractor/dropbox.py @@ -5,24 +5,26 @@ import os.path import re from .common import InfoExtractor +from ..utils import compat_urllib_parse class DropboxIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/s/(?P<id>[a-zA-Z0-9]{15})/(?P<title>[^?#]*)' _TEST = { - 'url': 'https://www.dropbox.com/s/0qr9sai2veej4f8/THE_DOCTOR_GAMES.mp4', - 'md5': '8ae17c51172fb7f93bdd6a214cc8c896', + 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4', + 'md5': '8a3d905427a6951ccb9eb292f154530b', 'info_dict': { - 'id': '0qr9sai2veej4f8', + 'id': 'nelirfsxnmcfbfh', 'ext': 'mp4', - 'title': 'THE_DOCTOR_GAMES' + 'title': 'youtube-dl test video \'ä"BaW_jenozKc' } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - title = os.path.splitext(mobj.group('title'))[0] + fn = compat_urllib_parse.unquote(mobj.group('title')) + title = os.path.splitext(fn)[0] video_url = url + '?dl=1' return { From 6f5342a201e2568ce91454d96281165c39dae16e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 13:03:18 +0200 Subject: [PATCH 300/340] [cnet] Fix title extraction URLs are still missing --- youtube_dl/extractor/cnet.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/cnet.py b/youtube_dl/extractor/cnet.py index a94f42571..710d5009b 100644 --- a/youtube_dl/extractor/cnet.py +++ b/youtube_dl/extractor/cnet.py @@ -43,7 +43,11 @@ class CNETIE(InfoExtractor): raise ExtractorError('Cannot find video data') video_id = vdata['id'] - title = vdata['headline'] + title = vdata.get('headline') + if title is None: + title = vdata.get('title') + if title is None: + raise ExtractorError('Cannot find title!') description = vdata.get('dek') thumbnail = vdata.get('image', {}).get('path') author = vdata.get('author') From 0e6ebc13d154e6b8b063dfe7e9c2dd28d427fb77 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 13:11:24 +0200 Subject: [PATCH 301/340] [vimeo] Update test description --- youtube_dl/extractor/vimeo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 255855558..a3c6e83b0 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -98,7 +98,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): 'info_dict': { 'id': '54469442', 'ext': 'mp4', - 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software', + 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012', 'uploader': 'The BLN & Business of Software', 'uploader_id': 'theblnbusinessofsoftware', 'duration': 3610, From a850fde1d82d86ed5b75e6f7e1f2e43817946290 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 13:14:41 +0200 Subject: [PATCH 302/340] [funnyordie] Fix test description --- youtube_dl/extractor/funnyordie.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 6e6b66660..721e5fce0 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -26,7 +26,7 @@ class FunnyOrDieIE(InfoExtractor): 'id': 'e402820827', 'ext': 'mp4', 'title': 'Please Use This Song (Jon Lajoie)', - 'description': 'md5:2ed27d364f5a805a6dba199faaf6681d', + 'description': 'Please use this to sell something. www.jonlajoie.com', 'thumbnail': 're:^http:.*\.jpg$', }, }] From caf5a8817bc53e6d799c70c71d7ca03568738620 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 13:16:48 +0200 Subject: [PATCH 303/340] [chilloutzone] Fix test description --- youtube_dl/extractor/chilloutzone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/chilloutzone.py b/youtube_dl/extractor/chilloutzone.py index 02d5ba527..a62395d4b 100644 --- a/youtube_dl/extractor/chilloutzone.py +++ b/youtube_dl/extractor/chilloutzone.py @@ -42,7 +42,7 @@ class ChilloutzoneIE(InfoExtractor): 'id': '85523671', 'ext': 'mp4', 'title': 'The Sunday Times - Icons', - 'description': 'md5:3e1c0dc6047498d6728dcdaad0891762', + 'description': 'md5:a5f7ff82e2f7a9ed77473fe666954e84', 'uploader': 'Us', 'uploader_id': 'usfilms', 'upload_date': '20140131' From ff1956e07b64735a37de886fa2e800dd823bb3e1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 13:19:41 +0200 Subject: [PATCH 304/340] [wdr] Replace test case --- youtube_dl/extractor/wdr.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index f741ba540..ab28ef6fe 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -55,14 +55,14 @@ class WDRIE(InfoExtractor): }, }, { - 'url': 'http://www.funkhauseuropa.de/av/audiosuepersongsoulbossanova100-audioplayer.html', - 'md5': '24e83813e832badb0a8d7d1ef9ef0691', + 'url': 'http://www.funkhauseuropa.de/av/audioflaviacoelhoamaramar100-audioplayer.html', + 'md5': '99a1443ff29af19f6c52cf6f4dc1f4aa', 'info_dict': { - 'id': 'mdb-463528', + 'id': 'mdb-478135', 'ext': 'mp3', - 'title': 'Süpersong: Soul Bossa Nova', + 'title': 'Flavia Coelho: Amar é Amar', 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', - 'upload_date': '20140630', + 'upload_date': '20140717', }, }, ] From df8ba0d2cf9ea0ae1fde4c9f76a12f315e88aef3 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 13:22:14 +0200 Subject: [PATCH 305/340] [tagesschau] Remove test case See http://de.wikipedia.org/wiki/Depublizieren for the sad rationale. --- youtube_dl/extractor/tagesschau.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dl/extractor/tagesschau.py index 25b9864ad..b87047451 100644 --- a/youtube_dl/extractor/tagesschau.py +++ b/youtube_dl/extractor/tagesschau.py @@ -19,16 +19,6 @@ class TagesschauIE(InfoExtractor): 'description': 'md5:69da3c61275b426426d711bde96463ab', 'thumbnail': 're:^http:.*\.jpg$', }, - }, { - 'url': 'http://www.tagesschau.de/multimedia/video/video-5964.html', - 'md5': '66652566900963a3f962333579eeffcf', - 'info_dict': { - 'id': '5964', - 'ext': 'mp4', - 'title': 'Nahost-Konflikt: Israel bombadiert Ziele im Gazastreifen und Westjordanland', - 'description': 'md5:07bfc78c48eec3145ed4805299a1900a', - 'thumbnail': 're:http://.*\.jpg', - }, }] _FORMATS = { From 06c155420fda2a922a7219dd6758f42b868e6d96 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 13:25:59 +0200 Subject: [PATCH 306/340] [sockshare] Simplify (#3268) --- youtube_dl/extractor/sockshare.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/sockshare.py b/youtube_dl/extractor/sockshare.py index cbf2d7abe..75b634bc6 100644 --- a/youtube_dl/extractor/sockshare.py +++ b/youtube_dl/extractor/sockshare.py @@ -5,7 +5,6 @@ from ..utils import ( ExtractorError, compat_urllib_parse, compat_urllib_request, - determine_ext, ) import re @@ -34,7 +33,7 @@ class SockshareIE(InfoExtractor): webpage = self._download_webpage(url, video_id) if re.search(self._FILE_DELETED_REGEX, webpage) is not None: - raise ExtractorError(u'Video %s does not exist' % video_id, + raise ExtractorError('Video %s does not exist' % video_id, expected=True) confirm_hash = self._html_search_regex(r'''(?x)<input\s+ @@ -54,19 +53,21 @@ class SockshareIE(InfoExtractor): req.add_header('Host', 'www.sockshare.com') req.add_header('Content-type', 'application/x-www-form-urlencoded') - webpage = self._download_webpage(req, video_id, 'Downloading video page') + webpage = self._download_webpage( + req, video_id, 'Downloading video page') - video_url = self._html_search_regex(r'<a href="([^"]*)".+class="download_file_link"', webpage, 'file url') + video_url = self._html_search_regex( + r'<a href="([^"]*)".+class="download_file_link"', + webpage, 'file url') video_url = "http://www.sockshare.com" + video_url title = self._html_search_regex(r'<h1>(.+)<strong>', webpage, 'title') - thumbnail = self._html_search_regex(r'<img\ssrc="([^"]*)".+name="bg"', - webpage, 'thumbnail') - ext = determine_ext(title) + thumbnail = self._html_search_regex( + r'<img\s+src="([^"]*)".+?name="bg"', + webpage, 'thumbnail') formats = [{ 'format_id': 'sd', 'url': video_url, - 'ext': ext, }] return { From f1f725c6a0e567283704046fc21614f4826e77fd Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 13:55:47 +0200 Subject: [PATCH 307/340] [dropbox] Fix title encoding on Python 2 --- youtube_dl/extractor/dropbox.py | 4 ++-- youtube_dl/utils.py | 21 ++++++++++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/dropbox.py b/youtube_dl/extractor/dropbox.py index 1711f0263..9f569aa93 100644 --- a/youtube_dl/extractor/dropbox.py +++ b/youtube_dl/extractor/dropbox.py @@ -5,7 +5,7 @@ import os.path import re from .common import InfoExtractor -from ..utils import compat_urllib_parse +from ..utils import compat_urllib_parse_unquote class DropboxIE(InfoExtractor): @@ -23,7 +23,7 @@ class DropboxIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - fn = compat_urllib_parse.unquote(mobj.group('title')) + fn = compat_urllib_parse_unquote(mobj.group('title')) title = os.path.splitext(fn)[0] video_url = url + '?dl=1' diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index bf4d1112f..3ecd798d7 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -91,11 +91,9 @@ except ImportError: compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w') try: - from urllib.parse import parse_qs as compat_parse_qs -except ImportError: # Python 2 - # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib. - # Python 2's version is apparently totally broken - def _unquote(string, encoding='utf-8', errors='replace'): + from urllib.parse import unquote as compat_urllib_parse_unquote +except ImportError: + def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'): if string == '': return string res = string.split('%') @@ -130,6 +128,13 @@ except ImportError: # Python 2 string += pct_sequence.decode(encoding, errors) return string + +try: + from urllib.parse import parse_qs as compat_parse_qs +except ImportError: # Python 2 + # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib. + # Python 2's version is apparently totally broken + def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace'): qs, _coerce_result = qs, unicode @@ -149,10 +154,12 @@ except ImportError: # Python 2 continue if len(nv[1]) or keep_blank_values: name = nv[0].replace('+', ' ') - name = _unquote(name, encoding=encoding, errors=errors) + name = compat_urllib_parse_unquote( + name, encoding=encoding, errors=errors) name = _coerce_result(name) value = nv[1].replace('+', ' ') - value = _unquote(value, encoding=encoding, errors=errors) + value = compat_urllib_parse_unquote( + value, encoding=encoding, errors=errors) value = _coerce_result(value) r.append((name, value)) return r From 754d8a035e3e1d0f3340aef662ae0df76a0be91d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 18:06:21 +0200 Subject: [PATCH 308/340] [nbcnews] Look in all playlists for video --- youtube_dl/extractor/nbc.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index aa34665d1..70aa98aee 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -85,11 +85,25 @@ class NBCNewsIE(InfoExtractor): flags=re.MULTILINE) bootstrap = json.loads(bootstrap_json) info = bootstrap['results'][0]['video'] - playlist_url = info['fallbackPlaylistUrl'] + '?form=MPXNBCNewsAPI' mpxid = info['mpxId'] - all_videos = self._download_json(playlist_url, title)['videos'] - # The response contains additional videos - info = next(v for v in all_videos if v['mpxId'] == mpxid) + + base_urls = [ + info['fallbackPlaylistUrl'], + info['associatedPlaylistUrl'], + ] + + for base_url in base_urls: + playlist_url = base_url + '?form=MPXNBCNewsAPI' + all_videos = self._download_json(playlist_url, title)['videos'] + + try: + info = next(v for v in all_videos if v['mpxId'] == mpxid) + break + except StopIteration: + continue + + if info is None: + raise ExtractorError('Could not find video in playlists') return { '_type': 'url', From 1e8ac8364b09259b5bfa277304f5fb31906b8801 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Mon, 21 Jul 2014 18:06:51 +0200 Subject: [PATCH 309/340] release 2014.07.21 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e2e0ee25c..0ce4a6c10 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.20.2' +__version__ = '2014.07.21' From 9dcb8f3fc7927a6a3e6f4747f64c6f8c3900cdc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Mon, 21 Jul 2014 20:42:20 +0200 Subject: [PATCH 310/340] [br] Allow '_' in the url (fixes #3311) --- youtube_dl/extractor/br.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py index 993360714..f7f2f713a 100644 --- a/youtube_dl/extractor/br.py +++ b/youtube_dl/extractor/br.py @@ -12,7 +12,7 @@ from ..utils import ( class BRIE(InfoExtractor): IE_DESC = 'Bayerischer Rundfunk Mediathek' - _VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-]+/)+(?P<id>[a-z0-9\-]+)\.html' + _VALID_URL = r'https?://(?:www\.)?br\.de/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html' _BASE_URL = 'http://www.br.de' _TESTS = [ From 53eb217661ee95fc14d3f801118b2d69070d0bbe Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 22 Jul 2014 04:53:06 +0200 Subject: [PATCH 311/340] Add another great example for the --extractor-descriptions output --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 0e7b9ddaf..c6a5b2b5b 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -633,7 +633,7 @@ def _real_main(argv=None): if desc is False: continue if hasattr(ie, 'SEARCH_KEY'): - _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise') + _SEARCHES = (u'cute kittens', u'slithering pythons', u'falling cat', u'angry poodle', u'purple fish', u'running tortoise', u'sleeping bunny') _COUNTS = (u'', u'5', u'10', u'all') desc += u' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) compat_print(desc) From 8904e979dfe489e37bda369a5863bceecb56d490 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Tue, 22 Jul 2014 20:37:33 +0700 Subject: [PATCH 312/340] [vodlocker] Fix _VALID_URL --- youtube_dl/extractor/vodlocker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index 68c59364b..6d3b78749 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -10,7 +10,7 @@ from ..utils import ( class VodlockerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vodlocker.com/(?P<id>[0-9a-zA-Z]+)(?:\..*?)?' + _VALID_URL = r'https?://(?:www\.)?vodlocker\.com/(?P<id>[0-9a-zA-Z]+)(?:\..*?)?' _TESTS = [{ 'url': 'http://vodlocker.com/e8wvyzz4sl42', From e00fc35dbe7cdba20d78ccbf7a2fb471d5356529 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister <phihag@phihag.de> Date: Tue, 22 Jul 2014 15:52:01 +0200 Subject: [PATCH 313/340] [kickstarter] Support embedded videos (Fixes #3322) --- youtube_dl/extractor/kickstarter.py | 37 +++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py index 961dd1aa6..56a76380c 100644 --- a/youtube_dl/extractor/kickstarter.py +++ b/youtube_dl/extractor/kickstarter.py @@ -8,7 +8,7 @@ from .common import InfoExtractor class KickStarterIE(InfoExtractor): _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*' - _TEST = { + _TESTS = [{ 'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant?ref=home_location', 'md5': 'c81addca81327ffa66c642b5d8b08cab', 'info_dict': { @@ -18,22 +18,45 @@ class KickStarterIE(InfoExtractor): 'description': 'A unique motocross documentary that examines the ' 'life and mind of one of sports most elite athletes: Josh Grant.', }, - } + }, { + 'note': 'Embedded video (not using the native kickstarter video service)', + 'url': 'https://www.kickstarter.com/projects/597507018/pebble-e-paper-watch-for-iphone-and-android/posts/659178', + 'playlist': [ + { + 'info_dict': { + 'id': '78704821', + 'ext': 'mp4', + 'uploader_id': 'pebble', + 'uploader': 'Pebble Technology', + 'title': 'Pebble iOS Notifications', + } + } + ], + }] def _real_extract(self, url): m = re.match(self._VALID_URL, url) video_id = m.group('id') webpage = self._download_webpage(url, video_id) - video_url = self._search_regex(r'data-video-url="(.*?)"', - webpage, 'video URL') - video_title = self._html_search_regex(r'<title>(.*?)', - webpage, 'title').rpartition('— Kickstarter')[0].strip() + title = self._html_search_regex( + r'\s*(.*?)(?:\s*— Kickstarter)?\s*', + webpage, 'title') + video_url = self._search_regex( + r'data-video-url="(.*?)"', + webpage, 'video URL', default=None) + if video_url is None: # No native kickstarter, look for embedded videos + return { + '_type': 'url_transparent', + 'ie_key': 'Generic', + 'url': url, + 'title': title, + } return { 'id': video_id, 'url': video_url, - 'title': video_title, + 'title': title, 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), } From 1771ddd85db7acda1e4174ccd186acd40a881fbc Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 22 Jul 2014 16:59:40 +0200 Subject: [PATCH 314/340] release 2014.07.22 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 0ce4a6c10..e5fcec839 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.21' +__version__ = '2014.07.22' From 2871d489a91b6de1a5849243e4d827123dd564ef Mon Sep 17 00:00:00 2001 From: Jason Terk Date: Tue, 22 Jul 2014 07:56:42 -0700 Subject: [PATCH 315/340] Support Alternative cbs.com URL Format Adds support for cbs.com URLs containing "/artist" instead of "/video". E.g.: http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/ --- youtube_dl/extractor/cbs.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index ac0315853..44d23aef6 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -4,9 +4,9 @@ from .common import InfoExtractor class CBSIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/video/(?P[^/]+)/.*' + _VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/(video|artist)/(?P[^/]+)/.*' - _TEST = { + _TESTS = [{ u'url': u'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', u'file': u'4JUVEwq3wUT7.flv', u'info_dict': { @@ -18,7 +18,19 @@ class CBSIE(InfoExtractor): # rtmp download u'skip_download': True, }, - } + }, { + u'url': u'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/', + u'file': u'P9gjWjelt6iP.flv', + u'info_dict': { + u'title': u'Live on Letterman - St. Vincent', + u'description': u'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.', + u'duration': 3221, + }, + u'params': { + # rtmp download + u'skip_download': True, + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) From e42a692f003eabdb1efad7b9f4b10ce97c712d32 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 22 Jul 2014 17:34:34 +0200 Subject: [PATCH 316/340] [cbs] Modernize Also add threatening skip blocks in there - access is only possible from the US. We may want to find a better geolocation restriction method for tests. --- youtube_dl/extractor/cbs.py | 42 +++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 44d23aef6..822f9a7be 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -1,35 +1,41 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor class CBSIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/(video|artist)/(?P[^/]+)/.*' + _VALID_URL = r'https?://(?:www\.)?cbs\.com/shows/[^/]+/(?:video|artist)/(?P[^/]+)/.*' _TESTS = [{ - u'url': u'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', - u'file': u'4JUVEwq3wUT7.flv', - u'info_dict': { - u'title': u'Connect Chat feat. Garth Brooks', - u'description': u'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!', - u'duration': 1495, + 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', + 'info_dict': { + 'id': '4JUVEwq3wUT7', + 'ext': 'flv', + 'title': 'Connect Chat feat. Garth Brooks', + 'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!', + 'duration': 1495, }, - u'params': { + 'params': { # rtmp download - u'skip_download': True, + 'skip_download': True, }, + '_skip': 'Blocked outside the US', }, { - u'url': u'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/', - u'file': u'P9gjWjelt6iP.flv', - u'info_dict': { - u'title': u'Live on Letterman - St. Vincent', - u'description': u'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.', - u'duration': 3221, + 'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/', + 'info_dict': { + 'id': 'P9gjWjelt6iP', + 'ext': 'flv', + 'title': 'Live on Letterman - St. Vincent', + 'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.', + 'duration': 3221, }, - u'params': { + 'params': { # rtmp download - u'skip_download': True, + 'skip_download': True, }, + '_skip': 'Blocked outside the US', }] def _real_extract(self, url): @@ -38,5 +44,5 @@ class CBSIE(InfoExtractor): webpage = self._download_webpage(url, video_id) real_id = self._search_regex( r"video\.settings\.pid\s*=\s*'([^']+)';", - webpage, u'real video ID') + webpage, 'real video ID') return self.url_result(u'theplatform:%s' % real_id) From 07cc63f386c6afe253b7707631663072d2fb8789 Mon Sep 17 00:00:00 2001 From: Charles Chen Date: Tue, 22 Jul 2014 14:10:27 -0700 Subject: [PATCH 317/340] [MLB] Enhanced _VALID_URL to cover more MLB videos --- youtube_dl/extractor/mlb.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py index 18ab2c135..84f500821 100644 --- a/youtube_dl/extractor/mlb.py +++ b/youtube_dl/extractor/mlb.py @@ -11,8 +11,22 @@ from ..utils import ( class MLBIE(InfoExtractor): - _VALID_URL = r'http?://m\.mlb\.com/video/(?:topic/[\da-z_-]+/)?v(?Pn?\d+)' + _VALID_URL = r'http?://m\.mlb\.com/.*video/(?:topic/[\da-z_-]+/)?v(?Pn?\d+)' _TESTS = [ + { + 'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea', + 'md5': 'ff56a598c2cf411a9a38a69709e97079', + 'info_dict': { + 'id': '34698933', + 'ext': 'mp4', + 'title': "Ackley's spectacular catch", + 'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0', + 'duration': 66, + 'timestamp': 1405980600, + 'upload_date': '20140721', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }, { 'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby', 'md5': 'd9c022c10d21f849f49c05ae12a8a7e9', From 38e292b112b82c9d5559b7c49e2e1c99b9e97dba Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Tue, 22 Jul 2014 23:54:05 +0200 Subject: [PATCH 318/340] [mlb] Fix regex --- youtube_dl/extractor/mlb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py index 84f500821..37c72bc53 100644 --- a/youtube_dl/extractor/mlb.py +++ b/youtube_dl/extractor/mlb.py @@ -11,7 +11,7 @@ from ..utils import ( class MLBIE(InfoExtractor): - _VALID_URL = r'http?://m\.mlb\.com/.*video/(?:topic/[\da-z_-]+/)?v(?Pn?\d+)' + _VALID_URL = r'https?://m\.mlb\.com/(?:.*?/)?video/(?:topic/[\da-z_-]+/)?v(?Pn?\d+)' _TESTS = [ { 'url': 'http://m.mlb.com/sea/video/topic/51231442/v34698933/nymsea-ackley-robs-a-home-run-with-an-amazing-catch/?c_id=sea', From 1a2ecbfbc4fa2b34ec864126c2ad7ed809604048 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 01:18:27 +0200 Subject: [PATCH 319/340] [vube] Add support for new data format (Fixes #3325) --- youtube_dl/extractor/vube.py | 51 ++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py index 7b77865cb..c1ab76465 100644 --- a/youtube_dl/extractor/vube.py +++ b/youtube_dl/extractor/vube.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import json import re from .common import InfoExtractor @@ -20,9 +21,8 @@ class VubeIE(InfoExtractor): 'ext': 'mp4', 'title': 'Chiara Grispo - Price Tag by Jessie J', 'description': 'md5:8ea652a1f36818352428cb5134933313', - 'thumbnail': 'http://frame.thestaticvube.com/snap/228x128/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f.jpg', + 'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102e7e63057-5ebc-4f5c-4065-6ce4ebde131f\.jpg$', 'uploader': 'Chiara.Grispo', - 'uploader_id': '1u3hX0znhP', 'timestamp': 1388743358, 'upload_date': '20140103', 'duration': 170.56 @@ -36,13 +36,27 @@ class VubeIE(InfoExtractor): 'ext': 'mp4', 'title': 'My 7 year old Sister and I singing "Alive" by Krewella', 'description': 'md5:40bcacb97796339f1690642c21d56f4a', - 'thumbnail': 'http://frame.thestaticvube.com/snap/228x128/102265d5a9f-0f17-4f6b-5753-adf08484ee1e.jpg', + 'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/102265d5a9f-0f17-4f6b-5753-adf08484ee1e\.jpg$', 'uploader': 'Seraina', - 'uploader_id': 'XU9VE2BQ2q', 'timestamp': 1396492438, 'upload_date': '20140403', 'duration': 240.107 } + }, { + 'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s', + 'md5': '0584fc13b50f887127d9d1007589d27f', + 'info_dict': { + 'id': '0nmsMY5vEq', + 'ext': 'mp4', + 'title': 'Frozen - Let It Go Cover by Siren Gene', + 'description': 'My rendition of "Let It Go" originally sung by Idina Menzel.', + 'uploader': 'Siren Gene', + 'uploader_id': 'Siren', + 'thumbnail': 're:^http://frame\.thestaticvube\.com/snap/[0-9x]+/10283ab622a-86c9-4681-51f2-30d1f65774af\.jpg$', + 'duration': 221.788, + 'like_count': int, + 'dislike_count': int, + } } ] @@ -50,8 +64,17 @@ class VubeIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - video = self._download_json( - 'http://vube.com/api/v2/video/%s' % video_id, video_id, 'Downloading video JSON') + webpage = self._download_webpage(url, video_id) + data_json = self._search_regex( + r'(?s)window\["(?:tapiVideoData|vubeOriginalVideoData)"\]\s*=\s*(\{.*?\n});\n', + webpage, 'video data' + ) + data = json.loads(data_json) + open('/dev/shm/f', 'w').write(json.dumps(data, indent=2)) + video = ( + data.get('video') or + data) + assert isinstance(video, dict) public_id = video['public_id'] @@ -69,16 +92,16 @@ class VubeIE(InfoExtractor): title = video['title'] description = video.get('description') - thumbnail = video['thumbnail_src'] - if thumbnail.startswith('//'): - thumbnail = 'http:' + thumbnail - uploader = video['user_alias'] - uploader_id = video['user_url_id'] - timestamp = int(video['upload_time']) + thumbnail = self._proto_relative_url( + video.get('thumbnail') or video.get('thumbnail_src'), + scheme='http:') + uploader = data.get('user', {}).get('channel', {}).get('name') or video.get('user_alias') + uploader_id = data.get('user', {}).get('name') + timestamp = int_or_none(video.get('upload_time')) duration = video['duration'] view_count = video.get('raw_view_count') - like_count = video.get('total_likes') - dislike_count= video.get('total_hates') + like_count = video.get('rlikes') + dislike_count = video.get('rhates') comment = self._download_json( 'http://vube.com/api/video/%s/comment' % video_id, video_id, 'Downloading video comment JSON') From 388841f8195bd79f242dc5beec8918759903bfae Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 01:18:42 +0200 Subject: [PATCH 320/340] release 2014.07.23 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index e5fcec839..c374657b4 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.22' +__version__ = '2014.07.23' From b090af592277260192689b718721b2192826de23 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 01:27:25 +0200 Subject: [PATCH 321/340] [vube] Fix comment count --- youtube_dl/extractor/common.py | 8 ++++++-- youtube_dl/extractor/vube.py | 30 +++++++++++++++++++++++------- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9b36e0789..88f12797c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -301,8 +301,12 @@ class InfoExtractor(object): def _download_json(self, url_or_request, video_id, note=u'Downloading JSON metadata', errnote=u'Unable to download JSON metadata', - transform_source=None): - json_string = self._download_webpage(url_or_request, video_id, note, errnote) + transform_source=None, + fatal=True): + json_string = self._download_webpage( + url_or_request, video_id, note, errnote, fatal=fatal) + if (not fatal) and json_string is False: + return None if transform_source: json_string = transform_source(json_string) try: diff --git a/youtube_dl/extractor/vube.py b/youtube_dl/extractor/vube.py index c1ab76465..f1b9e9a19 100644 --- a/youtube_dl/extractor/vube.py +++ b/youtube_dl/extractor/vube.py @@ -25,7 +25,10 @@ class VubeIE(InfoExtractor): 'uploader': 'Chiara.Grispo', 'timestamp': 1388743358, 'upload_date': '20140103', - 'duration': 170.56 + 'duration': 170.56, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, } }, { @@ -40,7 +43,10 @@ class VubeIE(InfoExtractor): 'uploader': 'Seraina', 'timestamp': 1396492438, 'upload_date': '20140403', - 'duration': 240.107 + 'duration': 240.107, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, } }, { 'url': 'http://vube.com/vote/Siren+Gene/0nmsMY5vEq?n=2&t=s', @@ -56,6 +62,7 @@ class VubeIE(InfoExtractor): 'duration': 221.788, 'like_count': int, 'dislike_count': int, + 'comment_count': int, } } ] @@ -70,7 +77,6 @@ class VubeIE(InfoExtractor): webpage, 'video data' ) data = json.loads(data_json) - open('/dev/shm/f', 'w').write(json.dumps(data, indent=2)) video = ( data.get('video') or data) @@ -101,12 +107,22 @@ class VubeIE(InfoExtractor): duration = video['duration'] view_count = video.get('raw_view_count') like_count = video.get('rlikes') + if like_count is None: + like_count = video.get('total_likes') dislike_count = video.get('rhates') + if dislike_count is None: + dislike_count = video.get('total_hates') - comment = self._download_json( - 'http://vube.com/api/video/%s/comment' % video_id, video_id, 'Downloading video comment JSON') - - comment_count = int_or_none(comment.get('total')) + comments = video.get('comments') + comment_count = None + if comments is None: + comment_data = self._download_json( + 'http://vube.com/api/video/%s/comment' % video_id, + video_id, 'Downloading video comment JSON', fatal=False) + if comment_data is not None: + comment_count = int_or_none(comment_data.get('total')) + else: + comment_count = len(comments) return { 'id': video_id, From a4e5af11844fc6d5f737e45a8a94067ccb7d5bc1 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 01:27:33 +0200 Subject: [PATCH 322/340] release 2014.07.23.1 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index c374657b4..5c61750d0 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.23' +__version__ = '2014.07.23.1' From 41c0849429649bd09381ae0db0535c300a5f4af0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 01:38:07 +0200 Subject: [PATCH 323/340] [savefrom] Make test description more flexible --- youtube_dl/extractor/savefrom.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/savefrom.py b/youtube_dl/extractor/savefrom.py index ccd545971..5b7367b94 100644 --- a/youtube_dl/extractor/savefrom.py +++ b/youtube_dl/extractor/savefrom.py @@ -20,7 +20,7 @@ class SaveFromIE(InfoExtractor): 'upload_date': '20120816', 'uploader': 'Howcast', 'uploader_id': 'Howcast', - 'description': 'md5:727900f130df3dc9a25e2721497c7910', + 'description': 're:(?s).* Hi, my name is Rene Dreifuss\. And I\'m here to show you some MMA.*', }, 'params': { 'skip_download': True From 798a2cad4f40a7181a441ceecdf7e8e3694cd2fd Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 01:40:01 +0200 Subject: [PATCH 324/340] [sockshare] Fix ext --- youtube_dl/extractor/sockshare.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/sockshare.py b/youtube_dl/extractor/sockshare.py index 75b634bc6..dc9f80550 100644 --- a/youtube_dl/extractor/sockshare.py +++ b/youtube_dl/extractor/sockshare.py @@ -5,6 +5,7 @@ from ..utils import ( ExtractorError, compat_urllib_parse, compat_urllib_request, + determine_ext, ) import re @@ -68,6 +69,7 @@ class SockshareIE(InfoExtractor): formats = [{ 'format_id': 'sd', 'url': video_url, + 'ext': determine_ext(title), }] return { From eae12e3fe3f7288d9133b85d00e694575f6674fc Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 01:41:44 +0200 Subject: [PATCH 325/340] [soundcloud] Adapt test --- youtube_dl/extractor/soundcloud.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 8a77c1370..097d0e418 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -82,10 +82,10 @@ class SoundcloudIE(InfoExtractor): # downloadable song { 'url': 'https://soundcloud.com/oddsamples/bus-brakes', - 'md5': 'fee7b8747b09bb755cefd4b853e7249a', + 'md5': '7624f2351f8a3b2e7cd51522496e7631', 'info_dict': { 'id': '128590877', - 'ext': 'wav', + 'ext': 'mp3', 'title': 'Bus Brakes', 'description': 'md5:0170be75dd395c96025d210d261c784e', 'uploader': 'oddsamples', From 0fd7fd71b4570c274452a59e570cb5b484a0be2f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 01:43:46 +0200 Subject: [PATCH 326/340] [test/helper] Do not use deprecated method --- test/helper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/helper.py b/test/helper.py index 84b16f770..b7299fb82 100644 --- a/test/helper.py +++ b/test/helper.py @@ -137,8 +137,8 @@ def expect_info_dict(self, expected_dict, got_dict): def assertRegexpMatches(self, text, regexp, msg=None): - if hasattr(self, 'assertRegexpMatches'): - return self.assertRegexpMatches(text, regexp, msg) + if hasattr(self, 'assertRegexp'): + return self.assertRegexp(text, regexp, msg) else: m = re.match(regexp, text) if not m: From d82ba23ba52e217c22df9ff20bf176dc6c72879a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 01:43:59 +0200 Subject: [PATCH 327/340] [soundcloud:playlist] Fix test description --- test/test_playlists.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_playlists.py b/test/test_playlists.py index 4789200e9..c221c47b9 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -154,7 +154,7 @@ class TestPlaylists(unittest.TestCase): self.assertEqual(result['id'], '4110309') self.assertEqual(result['title'], 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]') assertRegexpMatches( - self, result['description'], r'TILT Brass - Bowery Poetry Club') + self, result['description'], r'.*?TILT Brass - Bowery Poetry Club') self.assertEqual(len(result['entries']), 6) def test_livestream_event(self): From 37e64addc8412d0b8c47ec35be4998cab99a7df4 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 01:47:18 +0200 Subject: [PATCH 328/340] [nbc] Add missing import --- youtube_dl/extractor/nbc.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index 70aa98aee..d2e4acbad 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -4,7 +4,11 @@ import re import json from .common import InfoExtractor -from ..utils import find_xpath_attr, compat_str +from ..utils import ( + compat_str, + ExtractorError, + find_xpath_attr, +) class NBCIE(InfoExtractor): From 94e8df3a7e5f73109556392df3afd93f4e6f3566 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 01:47:36 +0200 Subject: [PATCH 329/340] [wdr] Fix umlaut parsing on Python 2.x --- youtube_dl/extractor/wdr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index ab28ef6fe..b8ad75e7d 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -6,7 +6,7 @@ import re from .common import InfoExtractor from ..utils import ( compat_parse_qs, - compat_urlparse, + compat_parse_qs, determine_ext, unified_strdate, ) @@ -81,7 +81,7 @@ class WDRIE(InfoExtractor): ] return self.playlist_result(entries, page_id) - flashvars = compat_urlparse.parse_qs( + flashvars = compat_parse_qs( self._html_search_regex(r' Date: Wed, 23 Jul 2014 01:49:25 +0200 Subject: [PATCH 330/340] [rtlnow] Simplify outdated test --- youtube_dl/extractor/rtlnow.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index 4835ec5ec..a45884b25 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -92,16 +92,7 @@ class RTLnowIE(InfoExtractor): }, { 'url': 'http://www.n-tvnow.de/deluxe-alles-was-spass-macht/thema-ua-luxushotel-fuer-vierbeiner.php?container_id=153819&player=1&season=0', - 'info_dict': { - 'id': '153819', - 'ext': 'flv', - 'title': 'Deluxe - Alles was Spaß macht - Thema u.a.: Luxushotel für Vierbeiner', - 'description': 'md5:c3705e1bb32e1a5b2bcd634fc065c631', - 'thumbnail': 'http://autoimg.static-fra.de/ntvnow/383157/1500x1500/image2.jpg', - 'upload_date': '20140221', - 'duration': 2429, - }, - 'skip': 'Only works from Germany', + 'only_matching': True, }, ] From 9f43890bcd6bf6c0bf75e0ec748d29dc9b01dab5 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 02:13:48 +0200 Subject: [PATCH 331/340] [jsinterp] Allow digits in function names --- test/test_youtube_signature.py | 6 ++++++ youtube_dl/jsinterp.py | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index e8a67c4c0..f0f33f1db 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -57,6 +57,12 @@ _TESTS = [ u'F375F75BF2AFDAAF2666E43868D46816F83F13E81C46.3725A8218E446A0DECD33F79DC282994D6AA92C92C9', u'9C29AA6D499282CD97F33DCED0A644E8128A5273.64C18E31F38361864D86834E6662FAADFA2FB57F' ), + ( + u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js', + u'js', + 84, + u'123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>' + ) ] diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index ae5bca2e6..13ad5ba1a 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -114,13 +114,13 @@ class JSInterpreter(object): obj = {} obj_m = re.search( (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) + - r'\s*(?P([a-zA-Z$]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' + + r'\s*(?P([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' + r'\}\s*;', self.code) fields = obj_m.group('fields') # Currently, it only supports function definitions fields_m = re.finditer( - r'(?P[a-zA-Z$]+)\s*:\s*function' + r'(?P[a-zA-Z$0-9]+)\s*:\s*function' r'\((?P[a-z,]+)\){(?P[^}]+)}', fields) for f in fields_m: From c081b35c27b8e2f1735c62933709448c1a675f72 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 02:19:33 +0200 Subject: [PATCH 332/340] [youtube] Support new player URLs (Fixes #3326) --- youtube_dl/extractor/youtube.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 072e711c2..a346f4c96 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -346,8 +346,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): def _extract_signature_function(self, video_id, player_url, slen): id_m = re.match( - r'.*-(?P[a-zA-Z0-9_-]+)(?:/watch_as3)?\.(?P[a-z]+)$', + r'.*-(?P[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P[a-z]+)$', player_url) + if not id_m: + raise ExtractorError('Cannot identify player %r' % player_url) player_type = id_m.group('ext') player_id = id_m.group('id') From b0472057a3977c6d23cb39ae645fcec17ea0f39b Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 02:24:50 +0200 Subject: [PATCH 333/340] [YoutubeDL] Make sure we really, really get out the encoding string Fixes #3326 Apparently, on some platforms, even outputting this fails already. --- youtube_dl/YoutubeDL.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 686988fe5..f5ca33d45 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1234,14 +1234,21 @@ class YoutubeDL(object): if not self.params.get('verbose'): return - write_string( + encoding_str = ( '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % ( locale.getpreferredencoding(), sys.getfilesystemencoding(), sys.stdout.encoding, - self.get_encoding()), - encoding=None - ) + self.get_encoding())) + try: + write_string(encoding_str, encoding=None) + except: + errmsg = 'Failed to write encoding string %r' % encoding_str + try: + sys.stdout.write(errmsg) + except: + pass + raise IOError(errmsg) self._write_string('[debug] youtube-dl version ' + __version__ + '\n') try: From dfe029a62c09677d1107907329aee5c2afddd961 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 02:25:27 +0200 Subject: [PATCH 334/340] release 2014.07.23.2 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5c61750d0..dca400d5e 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.23.1' +__version__ = '2014.07.23.2' From 92a86f4c1a539180664e1985926e8a751dde788e Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 02:43:59 +0200 Subject: [PATCH 335/340] Do not import from legacy FileDownloader class --- youtube_dl/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index c6a5b2b5b..de7bc0f5f 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -97,7 +97,7 @@ from .utils import ( write_string, ) from .update import update_self -from .FileDownloader import ( +from .downloader import ( FileDownloader, ) from .extractor import gen_extractors From becafcbf0f46671b7286758118a48a6602083eda Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 02:44:30 +0200 Subject: [PATCH 336/340] [wdr] fix up imports --- youtube_dl/extractor/wdr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index b8ad75e7d..54d37da61 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -6,7 +6,7 @@ import re from .common import InfoExtractor from ..utils import ( compat_parse_qs, - compat_parse_qs, + compat_urlparse, determine_ext, unified_strdate, ) From 0c92b57398417529ad22e80df683a2fb4adac4da Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 02:46:21 +0200 Subject: [PATCH 337/340] Remove unused imports --- test/test_all_urls.py | 1 - test/test_download.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 2bc81f020..0ff47cf1e 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -15,7 +15,6 @@ from youtube_dl.extractor import ( FacebookIE, gen_extractors, JustinTVIE, - PBSIE, YoutubeIE, ) diff --git a/test/test_download.py b/test/test_download.py index f171c10ba..d6540588c 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -10,7 +10,6 @@ from test.helper import ( get_params, gettestcases, expect_info_dict, - md5, try_rm, report_warning, ) @@ -24,7 +23,6 @@ import socket import youtube_dl.YoutubeDL from youtube_dl.utils import ( compat_http_client, - compat_str, compat_urllib_error, compat_HTTPError, DownloadError, From 6db274e057873feb256568f0e27c9a03a2b8d16c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 02:47:52 +0200 Subject: [PATCH 338/340] Remove legacy FileDownloader (Closes #2964) --- youtube_dl/FileDownloader.py | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 youtube_dl/FileDownloader.py diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py deleted file mode 100644 index 5c8e676a2..000000000 --- a/youtube_dl/FileDownloader.py +++ /dev/null @@ -1,12 +0,0 @@ -# Legacy file for backwards compatibility, use youtube_dl.downloader instead! -from .downloader import FileDownloader as RealFileDownloader -from .downloader import get_suitable_downloader - - -# This class reproduces the old behaviour of FileDownloader -class FileDownloader(RealFileDownloader): - def _do_download(self, filename, info_dict): - real_fd = get_suitable_downloader(info_dict)(self.ydl, self.params) - for ph in self._progress_hooks: - real_fd.add_progress_hook(ph) - return real_fd.download(filename, info_dict) From b7f8116406d7d7165b01c02b4f3a5dc63476a294 Mon Sep 17 00:00:00 2001 From: rupertbaxter2 Date: Wed, 23 Jul 2014 02:53:44 +0200 Subject: [PATCH 339/340] Deletes temp files after postprocess merge unless -k option is specified --- youtube_dl/YoutubeDL.py | 2 +- youtube_dl/postprocessor/ffmpeg.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f5ca33d45..4ff1ae0e8 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -999,7 +999,7 @@ class YoutubeDL(object): if info_dict.get('requested_formats') is not None: downloaded = [] success = True - merger = FFmpegMergerPP(self) + merger = FFmpegMergerPP(self, not self.params.get('keepvideo')) if not merger._get_executable(): postprocessors = [] self.report_warning('You have requested multiple ' diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 45328ed43..ea9273259 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -23,9 +23,10 @@ class FFmpegPostProcessorError(PostProcessingError): pass class FFmpegPostProcessor(PostProcessor): - def __init__(self,downloader=None): + def __init__(self,downloader=None,deletetempfiles=False): PostProcessor.__init__(self, downloader) self._exes = self.detect_executables() + self._deletetempfiles = deletetempfiles @staticmethod def detect_executables(): @@ -60,6 +61,9 @@ class FFmpegPostProcessor(PostProcessor): stderr = stderr.decode('utf-8', 'replace') msg = stderr.strip().split('\n')[-1] raise FFmpegPostProcessorError(msg) + if self._deletetempfiles: + for rempath in input_paths: + os.remove(rempath) def run_ffmpeg(self, path, out_path, opts): self.run_ffmpeg_multiple_files([path], out_path, opts) From d799b47b82e7b0d310427bf26bfaacef1e544f7d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 23 Jul 2014 02:55:06 +0200 Subject: [PATCH 340/340] [ffmpeg] PEP8 and a more obvious variable name --- youtube_dl/postprocessor/ffmpeg.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index ea9273259..8c5f7c43b 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -18,12 +18,12 @@ from ..utils import ( ) - class FFmpegPostProcessorError(PostProcessingError): pass + class FFmpegPostProcessor(PostProcessor): - def __init__(self,downloader=None,deletetempfiles=False): + def __init__(self, downloader=None, deletetempfiles=False): PostProcessor.__init__(self, downloader) self._exes = self.detect_executables() self._deletetempfiles = deletetempfiles @@ -62,8 +62,8 @@ class FFmpegPostProcessor(PostProcessor): msg = stderr.strip().split('\n')[-1] raise FFmpegPostProcessorError(msg) if self._deletetempfiles: - for rempath in input_paths: - os.remove(rempath) + for ipath in input_paths: + os.remove(ipath) def run_ffmpeg(self, path, out_path, opts): self.run_ffmpeg_multiple_files([path], out_path, opts)