# coding: utf-8 from __future__ import unicode_literals import os import re import sys from .common import InfoExtractor from .youtube import YoutubeIE from ..compat import ( compat_etree_fromstring, compat_urllib_parse_unquote, compat_urlparse, compat_xml_parse_error, ) from ..utils import ( determine_ext, ExtractorError, float_or_none, HEADRequest, is_html, js_to_json, orderedSet, sanitized_Request, smuggle_url, unescapeHTML, unified_strdate, unsmuggle_url, UnsupportedError, xpath_text, ) from .commonprotocols import RtmpIE from .arkena import ArkenaIE from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, ) from .condenast import CondeNastIE from .dailymotion import ( DailymotionIE, DailymotionCloudIE, ) from .dbtv import DBTVIE from .digiteka import DigitekaIE from .drtuber import DrTuberIE from .eagleplatform import EaglePlatformIE from .facebook import FacebookIE from .googledrive import GoogleDriveIE from .instagram import InstagramIE from .jwplatform import JWPlatformIE from .kaltura import KalturaIE from .liveleak import LiveLeakIE from .mtv import MTVServicesEmbeddedIE from .myvi import MyviIE from .nbc import NBCSportsVPlayerIE from .onionstudios import OnionStudiosIE from .ooyala import OoyalaIE from .openload import OpenloadIE from .piksel import PikselIE from .pladform import PladformIE from .pornhub import PornHubIE from .redtube import RedTubeIE from .rutube import RutubeIE from .rutv import RUTVIE from .senateisvp import SenateISVPIE from .smotri import SmotriIE from .soundcloud import SoundcloudIE from .sportbox import SportBoxEmbedIE from .svt import SVTIE from .theplatform import ThePlatformIE from .threeqsdn import ThreeQSDNIE from .tnaflix import TNAFlixNetworkEmbedIE from .tunein import TuneInBaseIE from .tvc import TVCIE from .twentymin import TwentyMinutenIE from .udn import UDNEmbedIE from .ustream import UstreamIE from .vbox7 import Vbox7IE from .vessel import VesselIE from .videa import VideaIE from .videomore import VideomoreIE from .videopress import VideoPressIE from .viewlift import ViewLiftEmbedIE from .vimeo import VimeoIE from .washingtonpost import WashingtonPostIE from .webcaster import WebcasterFeedIE from .xhamster import XHamsterEmbedIE class GenericIE(InfoExtractor): IE_DESC = 'Generic downloader that works on some sites' _VALID_URL = r'.*' IE_NAME = 'generic' _TESTS = [ # Direct link to a video { 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4', 'md5': '67d406c2bcb6af27fa886f31aa934bbe', 'info_dict': { 'id': 'trailer', 'ext': 'mp4', 'title': 'trailer', 'upload_date': '20100513', } }, # Direct link to media delivered compressed (until Accept-Encoding is *) { 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac', 'md5': '128c42e68b13950268b648275386fc74', 'info_dict': { 'id': 'FictionJunction-Parallel_Hearts', 'ext': 'flac', 'title': 'FictionJunction-Parallel_Hearts', 'upload_date': '20140522', }, 'expected_warnings': [ 'URL could be a direct video link, returning it as such.' ], 'skip': 'URL invalid', }, # Direct download with broken HEAD { 'url': 'http://ai-radio.org:8000/radio.opus', 'info_dict': { 'id': 'radio', 'ext': 'opus', 'title': 'radio', }, 'params': { 'skip_download': True, # infinite live stream }, 'expected_warnings': [ r'501.*Not Implemented', r'400.*Bad Request', ], }, # Direct link with incorrect MIME type { 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm', 'md5': '4ccbebe5f36706d85221f204d7eb5913', 'info_dict': { 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm', 'id': '5_Lennart_Poettering_-_Systemd', 'ext': 'webm', 'title': '5_Lennart_Poettering_-_Systemd', 'upload_date': '20141120', }, 'expected_warnings': [ 'URL could be a direct video link, returning it as such.' ] }, # RSS feed { 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml', 'info_dict': { 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml', 'title': 'Zero Punctuation', 'description': 're:.*groundbreaking video review series.*' }, 'playlist_mincount': 11, }, # RSS feed with enclosure { 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', 'info_dict': { 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624', 'ext': 'm4v', 'upload_date': '20150228', 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', } }, # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng { 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml', 'info_dict': { 'id': 'smil', 'ext': 'mp4', 'title': 'Automatics, robotics and biocybernetics', 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482', 'upload_date': '20130627', 'formats': 'mincount:16', 'subtitles': 'mincount:1', }, 'params': { 'force_generic_extractor': True, 'skip_download': True, }, }, # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html { 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil', 'info_dict': { 'id': 'hds', 'ext': 'flv', 'title': 'hds', 'formats': 'mincount:1', }, 'params': { 'skip_download': True, }, }, # SMIL from https://www.restudy.dk/video/play/id/1637 { 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml', 'info_dict': { 'id': 'video_1637', 'ext': 'flv', 'title': 'video_1637', 'formats': 'mincount:3', }, 'params': { 'skip_download': True, }, }, # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm { 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil', 'info_dict': { 'id': 'smil-service', 'ext': 'flv', 'title': 'smil-service', 'formats': 'mincount:1', }, 'params': { 'skip_download': True, }, }, # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370 { 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil', 'info_dict': { 'id': '4719370', 'ext': 'mp4', 'title': '571de1fd-47bc-48db-abf9-238872a58d1f', 'formats': 'mincount:3', }, 'params': { 'skip_download': True, }, }, # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html { 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf', 'info_dict': { 'id': 'mZlp2ctYIUEB', 'ext': 'mp4', 'title': 'Tikibad ontruimd wegens brand', 'description': 'md5:05ca046ff47b931f9b04855015e163a4', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 33, }, 'params': { 'skip_download': True, }, }, # MPD from http://dash-mse-test.appspot.com/media.html { 'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd', 'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53', 'info_dict': { 'id': 'car-20120827-manifest', 'ext': 'mp4', 'title': 'car-20120827-manifest', 'formats': 'mincount:9', 'upload_date': '20130904', }, 'params': { 'format': 'bestvideo', }, }, # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8 { 'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8', 'info_dict': { 'id': 'content', 'ext': 'mp4', 'title': 'content', 'formats': 'mincount:8', }, 'params': { # m3u8 downloads 'skip_download': True, }, 'skip': 'video gone', }, # m3u8 served with Content-Type: text/plain { 'url': 'http://www.nacentapps.com/m3u8/index.m3u8', 'info_dict': { 'id': 'index', 'ext': 'mp4', 'title': 'index', 'upload_date': '20140720', 'formats': 'mincount:11', }, 'params': { # m3u8 downloads 'skip_download': True, }, 'skip': 'video gone', }, # google redirect { 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', 'info_dict': { 'id': 'cmQHVoWB5FY', 'ext': 'mp4', 'upload_date': '20130224', 'uploader_id': 'TheVerge', 'description': r're:^Chris Ziegler takes a look at the\.*', 'uploader': 'The Verge', 'title': 'First Firefox OS phones side-by-side', }, 'params': { 'skip_download': False, } }, { # redirect in Refresh HTTP header 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1', 'info_dict': { 'id': 'pO8h3EaFRdo', 'ext': 'mp4', 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set', 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5', 'upload_date': '20150917', 'uploader_id': 'brtvofficial', 'uploader': 'Boiler Room', }, 'params': { 'skip_download': False, }, }, { 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html', 'md5': '85b90ccc9d73b4acd9138d3af4c27f89', 'info_dict': { 'id': '13601338388002', 'ext': 'mp4', 'uploader': 'www.hodiho.fr', 'title': 'R\u00e9gis plante sa Jeep', } }, # bandcamp page with custom domain { 'add_ie': ['Bandcamp'], 'url': 'http://bronyrock.com/track/the-pony-mash', 'info_dict': { 'id': '3235767654', 'ext': 'mp3', 'title': 'The Pony Mash', 'uploader': 'M_Pallante', }, 'skip': 'There is a limit of 200 free downloads / month for the test song', }, { # embedded brightcove video # it also tests brightcove videos that need to set the 'Referer' # in the http requests 'add_ie': ['BrightcoveLegacy'], 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/', 'info_dict': { 'id': '2765128793001', 'ext': 'mp4', 'title': 'Le cours de bourse : l’analyse technique', 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9', 'uploader': 'BFM BUSINESS', }, 'params': { 'skip_download': True, }, }, { # embedded with itemprop embedURL and video id spelled as `idVideo` 'add_id': ['BrightcoveLegacy'], 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/', 'info_dict': { 'id': '5255628253001', 'ext': 'mp4', 'title': 'md5:37c519b1128915607601e75a87995fc0', 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26', 'uploader': 'BFM BUSINESS', 'uploader_id': '876450612001', 'timestamp': 1482255315, 'upload_date': '20161220', }, 'params': { 'skip_download': True, }, }, { # https://github.com/rg3/youtube-dl/issues/2253 'url': 'http://bcove.me/i6nfkrc3', 'md5': '0ba9446db037002366bab3b3eb30c88c', 'info_dict': { 'id': '3101154703001', 'ext': 'mp4', 'title': 'Still no power', 'uploader': 'thestar.com', 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.', }, 'add_ie': ['BrightcoveLegacy'], 'skip': 'video gone', }, { 'url': 'http://www.championat.com/video/football/v/87/87499.html', 'md5': 'fb973ecf6e4a78a67453647444222983', 'info_dict': { 'id': '3414141473001', 'ext': 'mp4', 'title': 'Видео. Удаление Дзагоева (ЦСКА)', 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"', 'uploader': 'Championat', }, }, { # https://github.com/rg3/youtube-dl/issues/3541 'add_ie': ['BrightcoveLegacy'], 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1', 'info_dict': { 'id': '3866516442001', 'ext': 'mp4', 'title': 'Leer mij vrouwen kennen: Aflevering 1', 'description': 'Leer mij vrouwen kennen: Aflevering 1', 'uploader': 'SBS Broadcasting', }, 'skip': 'Restricted to Netherlands', 'params': { 'skip_download': True, # m3u8 download }, }, { # Brightcove with alternative playerID key 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html', 'info_dict': { 'id': 'nmeth.2062_SV1', 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research', }, 'playlist': [{ 'info_dict': { 'id': '2228375078001', 'ext': 'mp4', 'title': 'nmeth.2062-sv1', 'description': 'nmeth.2062-sv1', 'timestamp': 1363357591, 'upload_date': '20130315', 'uploader': 'Nature Publishing Group', 'uploader_id': '1964492299001', }, }], }, { # Brightcove with UUID in videoPlayer 'url': 'http://www8.hp.com/cn/zh/home.html', 'info_dict': { 'id': '5255815316001', 'ext': 'mp4', 'title': 'Sprocket Video - China', 'description': 'Sprocket Video - China', 'uploader': 'HP-Video Gallery', 'timestamp': 1482263210, 'upload_date': '20161220', 'uploader_id': '1107601872001', }, 'params': { 'skip_download': True, # m3u8 download }, }, # ooyala video { 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', 'md5': '166dd577b433b4d4ebfee10b0824d8ff', 'info_dict': { 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ', 'ext': 'mp4', 'title': '2cc213299525360.mov', # that's what we get 'duration': 238.231, }, 'add_ie': ['Ooyala'], }, { # ooyala video embedded with http://player.ooyala.com/iframe.js 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/', 'info_dict': { 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB', 'ext': 'mp4', 'title': '"Steve Jobs: Man in the Machine" trailer', 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."', 'duration': 135.427, }, 'params': { 'skip_download': True, }, 'skip': 'movie expired', }, # embed.ly video { 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/', 'info_dict': { 'id': '9ODmcdjQcHQ', 'ext': 'mp4', 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second', 'upload_date': '20140225', 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff', 'uploader': 'Tested', 'uploader_id': 'testedcom', }, # No need to test YoutubeIE here 'params': { 'skip_download': True, }, }, # funnyordie embed { 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns', 'info_dict': { 'id': '18e820ec3f', 'ext': 'mp4', 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama', 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.', }, # HEAD requests lead to endless 301, while GET is OK 'expected_warnings': ['301'], }, # RUTV embed { 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html', 'info_dict': { 'id': '776940', 'ext': 'mp4', 'title': 'Охотское море стало целиком российским', 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43', }, 'params': { # m3u8 download 'skip_download': True, }, }, # TVC embed { 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/', 'info_dict': { 'id': '55304', 'ext': 'mp4', 'title': 'Дошкольное воспитание', }, }, # SportBox embed { 'url': 'http://www.vestifinance.ru/articles/25753', 'info_dict': { 'id': '25753', 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"', }, 'playlist': [{ 'info_dict': { 'id': '370908', 'title': 'Госзаказ. День 3', 'ext': 'mp4', } }, { 'info_dict': { 'id': '370905', 'title': 'Госзаказ. День 2', 'ext': 'mp4', } }, { 'info_dict': { 'id': '370902', 'title': 'Госзаказ. День 1', 'ext': 'mp4', } }], 'params': { # m3u8 download 'skip_download': True, }, }, # Myvi.ru embed { 'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1', 'info_dict': { 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e', 'ext': 'mp4', 'title': 'Ужастики, русский трейлер (2015)', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 153, } }, # XHamster embed { 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8', 'info_dict': { 'id': 'showthread', 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )', }, 'playlist_mincount': 7, # This forum does not allow