From 631e20d92a0812a41d3646d1784db28615cc5403 Mon Sep 17 00:00:00 2001 From: rigstot Date: Fri, 5 Jul 2019 00:23:02 +0200 Subject: [PATCH] implement ThisVid extractor deobfuscates the video URL using a reverse engineered version of KVS player's algorithm. This was tested against version 4.0.4, 5.0.1 and 5.1.1.4 of the player and a warning will be issued if the major version changes. --- .github/ISSUE_TEMPLATE/1_broken_site.md | 63 + .../ISSUE_TEMPLATE/2_site_support_request.md | 54 + .../ISSUE_TEMPLATE/3_site_feature_request.md | 37 + .github/ISSUE_TEMPLATE/4_bug_report.md | 65 + .github/ISSUE_TEMPLATE/5_feature_request.md | 38 + .github/ISSUE_TEMPLATE/6_question.md | 38 + .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md | 63 + .../2_site_support_request.md | 54 + .../3_site_feature_request.md | 37 + .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md | 65 + .../ISSUE_TEMPLATE_tmpl/5_feature_request.md | 38 + .github/PULL_REQUEST_TEMPLATE.md | 28 + .gitignore | 53 + .travis.yml | 39 + AUTHORS | 248 + CONTRIBUTING.md | 368 ++ ChangeLog | 4470 +++++++++++++ LICENSE | 24 + MANIFEST.in | 9 + Makefile | 135 + README.md | 1378 ++++ bin/youtube-dl | 6 + devscripts/SizeOfImage.patch | Bin 0 -> 147 bytes devscripts/SizeOfImage_w.patch | Bin 0 -> 148 bytes devscripts/bash-completion.in | 29 + devscripts/bash-completion.py | 30 + devscripts/buildserver.py | 433 ++ devscripts/check-porn.py | 60 + devscripts/create-github-release.py | 120 + devscripts/fish-completion.in | 5 + devscripts/fish-completion.py | 49 + devscripts/generate_aes_testdata.py | 43 + devscripts/gh-pages/add-version.py | 43 + devscripts/gh-pages/generate-download.py | 22 + devscripts/gh-pages/sign-versions.py | 34 + devscripts/gh-pages/update-copyright.py | 21 + devscripts/gh-pages/update-feed.py | 76 + devscripts/gh-pages/update-sites.py | 37 + devscripts/install_jython.sh | 5 + devscripts/lazy_load_template.py | 19 + devscripts/make_contributing.py | 33 + devscripts/make_issue_template.py | 29 + devscripts/make_lazy_extractors.py | 100 + devscripts/make_readme.py | 26 + devscripts/make_supportedsites.py | 46 + devscripts/posix-locale.sh | 6 + devscripts/prepare_manpage.py | 79 + devscripts/release.sh | 141 + devscripts/run_tests.sh | 22 + devscripts/show-downloads-statistics.py | 47 + devscripts/wine-py2exe.sh | 56 + devscripts/zsh-completion.in | 28 + devscripts/zsh-completion.py | 49 + docs/.gitignore | 1 + docs/Makefile | 177 + docs/conf.py | 71 + docs/index.rst | 23 + docs/module_guide.rst | 67 + docs/supportedsites.md | 1169 ++++ setup.cfg | 6 + setup.py | 148 + test/__init__.py | 0 test/helper.py | 282 + test/parameters.json | 43 + test/swftests/.gitignore | 1 + test/swftests/ArrayAccess.as | 19 + test/swftests/ClassCall.as | 17 + test/swftests/ClassConstruction.as | 15 + test/swftests/ConstArrayAccess.as | 18 + test/swftests/ConstantInt.as | 12 + test/swftests/DictCall.as | 10 + test/swftests/EqualsOperator.as | 10 + test/swftests/LocalVars.as | 13 + test/swftests/MemberAssignment.as | 22 + test/swftests/NeOperator.as | 24 + test/swftests/PrivateCall.as | 21 + test/swftests/PrivateVoidCall.as | 22 + test/swftests/StaticAssignment.as | 13 + test/swftests/StaticRetrieval.as | 16 + test/swftests/StringBasics.as | 11 + test/swftests/StringCharCodeAt.as | 11 + test/swftests/StringConversion.as | 11 + test/test_InfoExtractor.py | 1071 ++++ test/test_YoutubeDL.py | 904 +++ test/test_YoutubeDLCookieJar.py | 44 + test/test_aes.py | 63 + test/test_age_restriction.py | 50 + test/test_all_urls.py | 143 + test/test_cache.py | 59 + test/test_compat.py | 126 + test/test_download.py | 265 + test/test_downloader_http.py | 115 + test/test_execution.py | 44 + test/test_http.py | 166 + test/test_iqiyi_sdk_interpreter.py | 48 + test/test_jsinterp.py | 117 + test/test_netrc.py | 26 + test/test_options.py | 26 + test/test_postprocessors.py | 17 + test/test_socks.py | 118 + test/test_subtitles.py | 358 ++ test/test_swfinterp.py | 80 + test/test_unicode_literals.py | 63 + test/test_update.py | 30 + test/test_utils.py | 1410 +++++ test/test_verbose_output.py | 71 + test/test_write_annotations.py | 80 + test/test_youtube_chapters.py | 275 + test/test_youtube_lists.py | 71 + test/test_youtube_signature.py | 123 + test/testcert.pem | 52 + test/testdata/cookies/httponly_cookies.txt | 6 + test/testdata/cookies/session_cookies.txt | 6 + test/testdata/f4m/custom_base_url.f4m | 10 + test/testdata/m3u8/pluzz_francetv_11507.m3u8 | 14 + test/testdata/m3u8/teamcoco_11995.m3u8 | 16 + test/testdata/m3u8/ted_18923.m3u8 | 28 + test/testdata/m3u8/toggle_mobile_12211.m3u8 | 13 + test/testdata/m3u8/twitch_vod.m3u8 | 20 + test/testdata/m3u8/vidio.m3u8 | 10 + test/testdata/mpd/float_duration.mpd | 18 + test/testdata/mpd/unfragmented.mpd | 28 + test/testdata/mpd/urls_only.mpd | 218 + test/testdata/xspf/foo_xspf.xspf | 34 + test/versions.json | 34 + tox.ini | 14 + youtube-dl.plugin.zsh | 24 + youtube_dl/YoutubeDL.py | 2407 +++++++ youtube_dl/__init__.py | 483 ++ youtube_dl/__main__.py | 19 + youtube_dl/aes.py | 361 ++ youtube_dl/cache.py | 96 + youtube_dl/compat.py | 3026 +++++++++ youtube_dl/downloader/__init__.py | 61 + youtube_dl/downloader/common.py | 391 ++ youtube_dl/downloader/dash.py | 80 + youtube_dl/downloader/external.py | 370 ++ youtube_dl/downloader/f4m.py | 438 ++ youtube_dl/downloader/fragment.py | 268 + youtube_dl/downloader/hls.py | 210 + youtube_dl/downloader/http.py | 354 ++ youtube_dl/downloader/ism.py | 259 + youtube_dl/downloader/rtmp.py | 214 + youtube_dl/downloader/rtsp.py | 47 + youtube_dl/extractor/__init__.py | 46 + youtube_dl/extractor/abc.py | 193 + youtube_dl/extractor/abcnews.py | 145 + youtube_dl/extractor/abcotvs.py | 112 + youtube_dl/extractor/academicearth.py | 41 + youtube_dl/extractor/acast.py | 135 + youtube_dl/extractor/addanime.py | 95 + youtube_dl/extractor/adn.py | 207 + youtube_dl/extractor/adobeconnect.py | 37 + youtube_dl/extractor/adobepass.py | 1572 +++++ youtube_dl/extractor/adobetv.py | 197 + youtube_dl/extractor/adultswim.py | 202 + youtube_dl/extractor/aenetworks.py | 247 + youtube_dl/extractor/afreecatv.py | 367 ++ youtube_dl/extractor/airmozilla.py | 66 + youtube_dl/extractor/aliexpress.py | 53 + youtube_dl/extractor/aljazeera.py | 33 + youtube_dl/extractor/allocine.py | 132 + youtube_dl/extractor/alphaporno.py | 77 + youtube_dl/extractor/amcnetworks.py | 118 + youtube_dl/extractor/americastestkitchen.py | 92 + youtube_dl/extractor/amp.py | 102 + youtube_dl/extractor/animeondemand.py | 293 + youtube_dl/extractor/anvato.py | 314 + youtube_dl/extractor/aol.py | 133 + youtube_dl/extractor/apa.py | 94 + youtube_dl/extractor/aparat.py | 95 + youtube_dl/extractor/appleconnect.py | 50 + youtube_dl/extractor/appletrailers.py | 283 + youtube_dl/extractor/archiveorg.py | 65 + youtube_dl/extractor/ard.py | 400 ++ youtube_dl/extractor/arkena.py | 133 + youtube_dl/extractor/arte.py | 201 + youtube_dl/extractor/asiancrush.py | 113 + youtube_dl/extractor/atresplayer.py | 202 + youtube_dl/extractor/atttechchannel.py | 55 + youtube_dl/extractor/atvat.py | 75 + youtube_dl/extractor/audimedia.py | 93 + youtube_dl/extractor/audioboom.py | 69 + youtube_dl/extractor/audiomack.py | 145 + youtube_dl/extractor/awaan.py | 185 + youtube_dl/extractor/aws.py | 78 + youtube_dl/extractor/azmedien.py | 86 + youtube_dl/extractor/baidu.py | 56 + youtube_dl/extractor/bambuser.py | 142 + youtube_dl/extractor/bandcamp.py | 417 ++ youtube_dl/extractor/bbc.py | 1344 ++++ youtube_dl/extractor/beampro.py | 191 + youtube_dl/extractor/beatport.py | 103 + youtube_dl/extractor/beeg.py | 109 + youtube_dl/extractor/behindkink.py | 46 + youtube_dl/extractor/bellmedia.py | 83 + youtube_dl/extractor/bet.py | 80 + youtube_dl/extractor/bfi.py | 37 + youtube_dl/extractor/bigflix.py | 78 + youtube_dl/extractor/bild.py | 40 + youtube_dl/extractor/bilibili.py | 308 + youtube_dl/extractor/biobiochiletv.py | 81 + youtube_dl/extractor/biqle.py | 97 + youtube_dl/extractor/bitchute.py | 135 + youtube_dl/extractor/bleacherreport.py | 106 + youtube_dl/extractor/blinkx.py | 86 + youtube_dl/extractor/bloomberg.py | 83 + youtube_dl/extractor/bokecc.py | 60 + youtube_dl/extractor/bostonglobe.py | 72 + youtube_dl/extractor/bpb.py | 62 + youtube_dl/extractor/br.py | 311 + youtube_dl/extractor/bravotv.py | 84 + youtube_dl/extractor/breakcom.py | 91 + youtube_dl/extractor/brightcove.py | 797 +++ youtube_dl/extractor/businessinsider.py | 42 + youtube_dl/extractor/buzzfeed.py | 98 + youtube_dl/extractor/byutv.py | 92 + youtube_dl/extractor/c56.py | 65 + youtube_dl/extractor/camdemy.py | 161 + youtube_dl/extractor/cammodels.py | 98 + youtube_dl/extractor/camtube.py | 71 + youtube_dl/extractor/camwithher.py | 89 + youtube_dl/extractor/canalc2.py | 73 + youtube_dl/extractor/canalplus.py | 116 + youtube_dl/extractor/canvas.py | 319 + youtube_dl/extractor/carambatv.py | 108 + youtube_dl/extractor/cartoonnetwork.py | 62 + youtube_dl/extractor/cbc.py | 457 ++ youtube_dl/extractor/cbs.py | 112 + youtube_dl/extractor/cbsinteractive.py | 103 + youtube_dl/extractor/cbslocal.py | 104 + youtube_dl/extractor/cbsnews.py | 147 + youtube_dl/extractor/cbssports.py | 38 + youtube_dl/extractor/ccc.py | 111 + youtube_dl/extractor/ccma.py | 109 + youtube_dl/extractor/cctv.py | 191 + youtube_dl/extractor/cda.py | 182 + youtube_dl/extractor/ceskatelevize.py | 287 + youtube_dl/extractor/channel9.py | 262 + youtube_dl/extractor/charlierose.py | 54 + youtube_dl/extractor/chaturbate.py | 77 + youtube_dl/extractor/chilloutzone.py | 96 + youtube_dl/extractor/chirbit.py | 91 + youtube_dl/extractor/cinchcast.py | 58 + youtube_dl/extractor/cinemax.py | 29 + youtube_dl/extractor/ciscolive.py | 151 + youtube_dl/extractor/cjsw.py | 72 + youtube_dl/extractor/cliphunter.py | 79 + youtube_dl/extractor/clippit.py | 74 + youtube_dl/extractor/cliprs.py | 33 + youtube_dl/extractor/clipsyndicate.py | 54 + youtube_dl/extractor/closertotruth.py | 92 + youtube_dl/extractor/cloudflarestream.py | 63 + youtube_dl/extractor/cloudy.py | 60 + youtube_dl/extractor/clubic.py | 56 + youtube_dl/extractor/clyp.py | 82 + youtube_dl/extractor/cmt.py | 54 + youtube_dl/extractor/cnbc.py | 66 + youtube_dl/extractor/cnn.py | 144 + youtube_dl/extractor/comcarcoff.py | 74 + youtube_dl/extractor/comedycentral.py | 142 + youtube_dl/extractor/common.py | 2976 +++++++++ youtube_dl/extractor/commonmistakes.py | 50 + youtube_dl/extractor/commonprotocols.py | 60 + youtube_dl/extractor/condenast.py | 232 + youtube_dl/extractor/corus.py | 105 + youtube_dl/extractor/coub.py | 140 + youtube_dl/extractor/cracked.py | 90 + youtube_dl/extractor/crackle.py | 200 + youtube_dl/extractor/crooksandliars.py | 60 + youtube_dl/extractor/crunchyroll.py | 680 ++ youtube_dl/extractor/cspan.py | 196 + youtube_dl/extractor/ctsnews.py | 87 + youtube_dl/extractor/ctvnews.py | 68 + youtube_dl/extractor/cultureunplugged.py | 70 + youtube_dl/extractor/curiositystream.py | 161 + youtube_dl/extractor/cwtv.py | 97 + youtube_dl/extractor/dailymail.py | 84 + youtube_dl/extractor/dailymotion.py | 499 ++ youtube_dl/extractor/daisuki.py | 154 + youtube_dl/extractor/daum.py | 308 + youtube_dl/extractor/dbtv.py | 56 + youtube_dl/extractor/dctp.py | 115 + youtube_dl/extractor/deezer.py | 91 + youtube_dl/extractor/defense.py | 39 + youtube_dl/extractor/democracynow.py | 96 + youtube_dl/extractor/dfb.py | 57 + youtube_dl/extractor/dhm.py | 59 + youtube_dl/extractor/digg.py | 56 + youtube_dl/extractor/digiteka.py | 112 + youtube_dl/extractor/discovery.py | 116 + youtube_dl/extractor/discoverygo.py | 175 + youtube_dl/extractor/discoverynetworks.py | 65 + youtube_dl/extractor/discoveryvr.py | 59 + youtube_dl/extractor/disney.py | 170 + youtube_dl/extractor/dispeak.py | 125 + youtube_dl/extractor/dotsub.py | 83 + youtube_dl/extractor/douyutv.py | 201 + youtube_dl/extractor/dplay.py | 376 ++ youtube_dl/extractor/drbonanza.py | 59 + youtube_dl/extractor/dreisat.py | 193 + youtube_dl/extractor/dropbox.py | 40 + youtube_dl/extractor/drtuber.py | 112 + youtube_dl/extractor/drtv.py | 305 + youtube_dl/extractor/dtube.py | 83 + youtube_dl/extractor/dumpert.py | 69 + youtube_dl/extractor/dvtv.py | 184 + youtube_dl/extractor/dw.py | 108 + youtube_dl/extractor/eagleplatform.py | 206 + youtube_dl/extractor/ebaumsworld.py | 33 + youtube_dl/extractor/echomsk.py | 46 + youtube_dl/extractor/egghead.py | 129 + youtube_dl/extractor/ehow.py | 38 + youtube_dl/extractor/eighttracks.py | 164 + youtube_dl/extractor/einthusan.py | 102 + youtube_dl/extractor/eitb.py | 88 + youtube_dl/extractor/ellentube.py | 133 + youtube_dl/extractor/elpais.py | 95 + youtube_dl/extractor/embedly.py | 16 + youtube_dl/extractor/engadget.py | 27 + youtube_dl/extractor/eporner.py | 130 + youtube_dl/extractor/eroprofile.py | 95 + youtube_dl/extractor/escapist.py | 111 + youtube_dl/extractor/espn.py | 242 + youtube_dl/extractor/esri.py | 74 + youtube_dl/extractor/europa.py | 93 + youtube_dl/extractor/everyonesmixtape.py | 77 + youtube_dl/extractor/expotv.py | 77 + youtube_dl/extractor/expressen.py | 98 + youtube_dl/extractor/extractors.py | 1519 +++++ youtube_dl/extractor/extremetube.py | 50 + youtube_dl/extractor/eyedotv.py | 64 + youtube_dl/extractor/facebook.py | 501 ++ youtube_dl/extractor/faz.py | 93 + youtube_dl/extractor/fc2.py | 160 + youtube_dl/extractor/fczenit.py | 56 + youtube_dl/extractor/filmon.py | 178 + youtube_dl/extractor/filmweb.py | 42 + youtube_dl/extractor/firsttv.py | 156 + youtube_dl/extractor/fivemin.py | 54 + youtube_dl/extractor/fivetv.py | 89 + youtube_dl/extractor/flickr.py | 116 + youtube_dl/extractor/flipagram.py | 115 + youtube_dl/extractor/folketinget.py | 77 + youtube_dl/extractor/footyroom.py | 56 + youtube_dl/extractor/formula1.py | 33 + youtube_dl/extractor/fourtube.py | 309 + youtube_dl/extractor/fox.py | 150 + youtube_dl/extractor/fox9.py | 42 + youtube_dl/extractor/foxgay.py | 63 + youtube_dl/extractor/foxnews.py | 127 + youtube_dl/extractor/foxsports.py | 33 + youtube_dl/extractor/franceculture.py | 63 + youtube_dl/extractor/franceinter.py | 56 + youtube_dl/extractor/francetv.py | 516 ++ youtube_dl/extractor/freesound.py | 79 + youtube_dl/extractor/freespeech.py | 31 + youtube_dl/extractor/freshlive.py | 83 + youtube_dl/extractor/frontendmasters.py | 263 + youtube_dl/extractor/funimation.py | 154 + youtube_dl/extractor/funk.py | 174 + youtube_dl/extractor/funnyordie.py | 162 + youtube_dl/extractor/fusion.py | 84 + youtube_dl/extractor/fxnetworks.py | 77 + youtube_dl/extractor/gaia.py | 130 + youtube_dl/extractor/gameinformer.py | 33 + youtube_dl/extractor/gameone.py | 134 + youtube_dl/extractor/gamespot.py | 139 + youtube_dl/extractor/gamestar.py | 65 + youtube_dl/extractor/gaskrank.py | 101 + youtube_dl/extractor/gazeta.py | 48 + youtube_dl/extractor/gdcvault.py | 188 + youtube_dl/extractor/generic.py | 3410 ++++++++++ youtube_dl/extractor/gfycat.py | 116 + youtube_dl/extractor/giantbomb.py | 87 + youtube_dl/extractor/giga.py | 102 + youtube_dl/extractor/gigya.py | 22 + youtube_dl/extractor/glide.py | 43 + youtube_dl/extractor/globo.py | 224 + youtube_dl/extractor/go.py | 227 + youtube_dl/extractor/go90.py | 149 + youtube_dl/extractor/godtube.py | 58 + youtube_dl/extractor/golem.py | 72 + youtube_dl/extractor/googledrive.py | 277 + youtube_dl/extractor/googleplus.py | 73 + youtube_dl/extractor/googlesearch.py | 59 + youtube_dl/extractor/goshgay.py | 51 + youtube_dl/extractor/gputechconf.py | 35 + youtube_dl/extractor/groupon.py | 67 + youtube_dl/extractor/hark.py | 33 + youtube_dl/extractor/hbo.py | 175 + youtube_dl/extractor/hearthisat.py | 135 + youtube_dl/extractor/heise.py | 163 + youtube_dl/extractor/hellporno.py | 75 + youtube_dl/extractor/helsinki.py | 43 + youtube_dl/extractor/hentaistigma.py | 39 + youtube_dl/extractor/hgtv.py | 40 + youtube_dl/extractor/hidive.py | 118 + youtube_dl/extractor/historicfilms.py | 47 + youtube_dl/extractor/hitbox.py | 214 + youtube_dl/extractor/hitrecord.py | 68 + youtube_dl/extractor/hketv.py | 191 + youtube_dl/extractor/hornbunny.py | 49 + youtube_dl/extractor/hotnewhiphop.py | 66 + youtube_dl/extractor/hotstar.py | 201 + youtube_dl/extractor/howcast.py | 43 + youtube_dl/extractor/howstuffworks.py | 90 + youtube_dl/extractor/hrti.py | 208 + youtube_dl/extractor/huajiao.py | 56 + youtube_dl/extractor/huffpost.py | 96 + youtube_dl/extractor/hungama.py | 117 + youtube_dl/extractor/hypem.py | 49 + youtube_dl/extractor/iconosquare.py | 85 + youtube_dl/extractor/ign.py | 232 + youtube_dl/extractor/imdb.py | 121 + youtube_dl/extractor/imgur.py | 154 + youtube_dl/extractor/ina.py | 83 + youtube_dl/extractor/inc.py | 59 + youtube_dl/extractor/indavideo.py | 128 + youtube_dl/extractor/infoq.py | 136 + youtube_dl/extractor/instagram.py | 425 ++ youtube_dl/extractor/internazionale.py | 85 + youtube_dl/extractor/internetvideoarchive.py | 100 + youtube_dl/extractor/iprima.py | 132 + youtube_dl/extractor/iqiyi.py | 394 ++ youtube_dl/extractor/ir90tv.py | 42 + youtube_dl/extractor/itv.py | 312 + youtube_dl/extractor/ivi.py | 220 + youtube_dl/extractor/ivideon.py | 83 + youtube_dl/extractor/iwara.py | 99 + youtube_dl/extractor/izlesene.py | 117 + youtube_dl/extractor/jamendo.py | 150 + youtube_dl/extractor/jeuxvideo.py | 56 + youtube_dl/extractor/joj.py | 108 + youtube_dl/extractor/jove.py | 80 + youtube_dl/extractor/jpopsukitv.py | 68 + youtube_dl/extractor/jwplatform.py | 41 + youtube_dl/extractor/kakao.py | 149 + youtube_dl/extractor/kaltura.py | 359 ++ youtube_dl/extractor/kanalplay.py | 97 + youtube_dl/extractor/kankan.py | 48 + youtube_dl/extractor/karaoketv.py | 64 + youtube_dl/extractor/karrierevideos.py | 99 + youtube_dl/extractor/keek.py | 39 + youtube_dl/extractor/keezmovies.py | 133 + youtube_dl/extractor/ketnet.py | 93 + youtube_dl/extractor/khanacademy.py | 82 + youtube_dl/extractor/kickstarter.py | 71 + youtube_dl/extractor/kinopoisk.py | 70 + youtube_dl/extractor/konserthusetplay.py | 124 + youtube_dl/extractor/kontrtube.py | 73 + youtube_dl/extractor/krasview.py | 60 + youtube_dl/extractor/ku6.py | 32 + youtube_dl/extractor/kusi.py | 88 + youtube_dl/extractor/kuwo.py | 352 ++ youtube_dl/extractor/la7.py | 67 + youtube_dl/extractor/laola1tv.py | 265 + youtube_dl/extractor/lci.py | 26 + youtube_dl/extractor/lcp.py | 90 + youtube_dl/extractor/learnr.py | 33 + youtube_dl/extractor/lecture2go.py | 71 + youtube_dl/extractor/lecturio.py | 244 + youtube_dl/extractor/leeco.py | 368 ++ youtube_dl/extractor/lego.py | 128 + youtube_dl/extractor/lemonde.py | 58 + youtube_dl/extractor/lenta.py | 53 + youtube_dl/extractor/libraryofcongress.py | 153 + youtube_dl/extractor/libsyn.py | 93 + youtube_dl/extractor/lifenews.py | 239 + youtube_dl/extractor/limelight.py | 377 ++ youtube_dl/extractor/line.py | 90 + youtube_dl/extractor/linkedin.py | 182 + youtube_dl/extractor/linuxacademy.py | 174 + youtube_dl/extractor/litv.py | 148 + youtube_dl/extractor/liveleak.py | 191 + youtube_dl/extractor/livestream.py | 366 ++ youtube_dl/extractor/lnkgo.py | 116 + youtube_dl/extractor/localnews8.py | 47 + youtube_dl/extractor/lovehomeporn.py | 37 + youtube_dl/extractor/lrt.py | 94 + youtube_dl/extractor/lynda.py | 332 + youtube_dl/extractor/m6.py | 25 + youtube_dl/extractor/macgamestore.py | 42 + youtube_dl/extractor/mailru.py | 314 + youtube_dl/extractor/makertv.py | 32 + youtube_dl/extractor/malltv.py | 53 + youtube_dl/extractor/mangomolo.py | 53 + youtube_dl/extractor/manyvids.py | 92 + youtube_dl/extractor/markiza.py | 125 + youtube_dl/extractor/massengeschmacktv.py | 77 + youtube_dl/extractor/matchtv.py | 55 + youtube_dl/extractor/mdr.py | 184 + youtube_dl/extractor/medialaan.py | 269 + youtube_dl/extractor/mediaset.py | 163 + youtube_dl/extractor/mediasite.py | 366 ++ youtube_dl/extractor/medici.py | 70 + youtube_dl/extractor/megaphone.py | 55 + youtube_dl/extractor/meipai.py | 104 + youtube_dl/extractor/melonvod.py | 72 + youtube_dl/extractor/meta.py | 73 + youtube_dl/extractor/metacafe.py | 287 + youtube_dl/extractor/metacritic.py | 65 + youtube_dl/extractor/mgoon.py | 87 + youtube_dl/extractor/mgtv.py | 92 + youtube_dl/extractor/miaopai.py | 40 + .../extractor/microsoftvirtualacademy.py | 195 + youtube_dl/extractor/minhateca.py | 70 + youtube_dl/extractor/ministrygrid.py | 57 + youtube_dl/extractor/minoto.py | 51 + youtube_dl/extractor/miomio.py | 141 + youtube_dl/extractor/mit.py | 156 + youtube_dl/extractor/mitele.py | 120 + youtube_dl/extractor/mixcloud.py | 398 ++ youtube_dl/extractor/mlb.py | 120 + youtube_dl/extractor/mnet.py | 89 + youtube_dl/extractor/moevideo.py | 79 + youtube_dl/extractor/mofosex.py | 56 + youtube_dl/extractor/mojvideo.py | 58 + youtube_dl/extractor/morningstar.py | 50 + youtube_dl/extractor/motherless.py | 205 + youtube_dl/extractor/motorsport.py | 49 + youtube_dl/extractor/movieclips.py | 49 + youtube_dl/extractor/moviezine.py | 45 + youtube_dl/extractor/movingimage.py | 52 + youtube_dl/extractor/msn.py | 115 + youtube_dl/extractor/mtv.py | 490 ++ youtube_dl/extractor/muenchentv.py | 75 + youtube_dl/extractor/musicplayon.py | 66 + youtube_dl/extractor/mwave.py | 90 + youtube_dl/extractor/mychannels.py | 40 + youtube_dl/extractor/myspace.py | 212 + youtube_dl/extractor/myspass.py | 73 + youtube_dl/extractor/myvi.py | 111 + youtube_dl/extractor/myvidster.py | 29 + youtube_dl/extractor/nationalgeographic.py | 82 + youtube_dl/extractor/naver.py | 128 + youtube_dl/extractor/nba.py | 154 + youtube_dl/extractor/nbc.py | 469 ++ youtube_dl/extractor/ndr.py | 389 ++ youtube_dl/extractor/ndtv.py | 115 + youtube_dl/extractor/nerdcubed.py | 36 + youtube_dl/extractor/neteasemusic.py | 485 ++ youtube_dl/extractor/netzkino.py | 89 + youtube_dl/extractor/newgrounds.py | 168 + youtube_dl/extractor/newstube.py | 83 + youtube_dl/extractor/nextmedia.py | 238 + youtube_dl/extractor/nexx.py | 440 ++ youtube_dl/extractor/nfl.py | 231 + youtube_dl/extractor/nhk.py | 81 + youtube_dl/extractor/nhl.py | 128 + youtube_dl/extractor/nick.py | 245 + youtube_dl/extractor/niconico.py | 470 ++ youtube_dl/extractor/ninecninemedia.py | 102 + youtube_dl/extractor/ninegag.py | 104 + youtube_dl/extractor/ninenow.py | 89 + youtube_dl/extractor/nintendo.py | 46 + youtube_dl/extractor/njpwworld.py | 98 + youtube_dl/extractor/nobelprize.py | 62 + youtube_dl/extractor/noco.py | 235 + youtube_dl/extractor/nonktube.py | 33 + youtube_dl/extractor/noovo.py | 104 + youtube_dl/extractor/normalboots.py | 54 + youtube_dl/extractor/nosvideo.py | 75 + youtube_dl/extractor/nova.py | 256 + youtube_dl/extractor/nowness.py | 147 + youtube_dl/extractor/noz.py | 89 + youtube_dl/extractor/npo.py | 767 +++ youtube_dl/extractor/npr.py | 108 + youtube_dl/extractor/nrk.py | 677 ++ youtube_dl/extractor/nrl.py | 30 + youtube_dl/extractor/ntvcojp.py | 49 + youtube_dl/extractor/ntvde.py | 77 + youtube_dl/extractor/ntvru.py | 132 + youtube_dl/extractor/nuevo.py | 39 + youtube_dl/extractor/nuvid.py | 71 + youtube_dl/extractor/nytimes.py | 223 + youtube_dl/extractor/nzz.py | 43 + youtube_dl/extractor/odatv.py | 50 + youtube_dl/extractor/odnoklassniki.py | 259 + youtube_dl/extractor/oktoberfesttv.py | 47 + youtube_dl/extractor/once.py | 43 + youtube_dl/extractor/ondemandkorea.py | 62 + youtube_dl/extractor/onet.py | 250 + youtube_dl/extractor/onionstudios.py | 81 + youtube_dl/extractor/ooyala.py | 207 + youtube_dl/extractor/openload.py | 483 ++ youtube_dl/extractor/ora.py | 75 + youtube_dl/extractor/orf.py | 426 ++ youtube_dl/extractor/outsidetv.py | 28 + youtube_dl/extractor/packtpub.py | 170 + youtube_dl/extractor/pandatv.py | 99 + youtube_dl/extractor/pandoratv.py | 134 + youtube_dl/extractor/parliamentliveuk.py | 43 + youtube_dl/extractor/patreon.py | 136 + youtube_dl/extractor/pbs.py | 710 +++ youtube_dl/extractor/pearvideo.py | 63 + youtube_dl/extractor/peertube.py | 250 + youtube_dl/extractor/people.py | 32 + youtube_dl/extractor/performgroup.py | 83 + youtube_dl/extractor/periscope.py | 171 + youtube_dl/extractor/philharmoniedeparis.py | 100 + youtube_dl/extractor/phoenix.py | 45 + youtube_dl/extractor/photobucket.py | 46 + youtube_dl/extractor/picarto.py | 153 + youtube_dl/extractor/piksel.py | 123 + youtube_dl/extractor/pinkbike.py | 97 + youtube_dl/extractor/pladform.py | 125 + youtube_dl/extractor/platzi.py | 217 + youtube_dl/extractor/playfm.py | 75 + youtube_dl/extractor/playplustv.py | 109 + youtube_dl/extractor/plays.py | 53 + youtube_dl/extractor/playtvak.py | 191 + youtube_dl/extractor/playvid.py | 99 + youtube_dl/extractor/playwire.py | 75 + youtube_dl/extractor/pluralsight.py | 501 ++ youtube_dl/extractor/podomatic.py | 76 + youtube_dl/extractor/pokemon.py | 75 + youtube_dl/extractor/polskieradio.py | 180 + youtube_dl/extractor/popcorntv.py | 76 + youtube_dl/extractor/porn91.py | 58 + youtube_dl/extractor/porncom.py | 103 + youtube_dl/extractor/pornhd.py | 109 + youtube_dl/extractor/pornhub.py | 582 ++ youtube_dl/extractor/pornotube.py | 85 + youtube_dl/extractor/pornovoisines.py | 108 + youtube_dl/extractor/pornoxo.py | 58 + youtube_dl/extractor/presstv.py | 74 + youtube_dl/extractor/promptfile.py | 70 + youtube_dl/extractor/prosiebensat1.py | 500 ++ youtube_dl/extractor/puhutv.py | 247 + youtube_dl/extractor/puls4.py | 57 + youtube_dl/extractor/pyvideo.py | 72 + youtube_dl/extractor/qqmusic.py | 369 ++ youtube_dl/extractor/r7.py | 112 + youtube_dl/extractor/radiobremen.py | 63 + youtube_dl/extractor/radiocanada.py | 171 + youtube_dl/extractor/radiode.py | 52 + youtube_dl/extractor/radiofrance.py | 59 + youtube_dl/extractor/radiojavan.py | 83 + youtube_dl/extractor/rai.py | 502 ++ youtube_dl/extractor/raywenderlich.py | 179 + youtube_dl/extractor/rbmaradio.py | 72 + youtube_dl/extractor/rds.py | 70 + youtube_dl/extractor/redbulltv.py | 128 + youtube_dl/extractor/reddit.py | 130 + youtube_dl/extractor/redtube.py | 115 + youtube_dl/extractor/regiotv.py | 62 + youtube_dl/extractor/rentv.py | 106 + youtube_dl/extractor/restudy.py | 44 + youtube_dl/extractor/reuters.py | 69 + youtube_dl/extractor/reverbnation.py | 53 + youtube_dl/extractor/revision3.py | 170 + youtube_dl/extractor/rice.py | 116 + youtube_dl/extractor/rmcdecouverte.py | 55 + youtube_dl/extractor/ro220.py | 43 + youtube_dl/extractor/rockstargames.py | 69 + youtube_dl/extractor/roosterteeth.py | 148 + youtube_dl/extractor/rottentomatoes.py | 32 + youtube_dl/extractor/roxwel.py | 53 + youtube_dl/extractor/rozhlas.py | 50 + youtube_dl/extractor/rtbf.py | 161 + youtube_dl/extractor/rte.py | 167 + youtube_dl/extractor/rtl2.py | 207 + youtube_dl/extractor/rtlnl.py | 126 + youtube_dl/extractor/rtp.py | 66 + youtube_dl/extractor/rts.py | 230 + youtube_dl/extractor/rtve.py | 292 + youtube_dl/extractor/rtvnh.py | 62 + youtube_dl/extractor/rtvs.py | 47 + youtube_dl/extractor/rudo.py | 53 + youtube_dl/extractor/ruhd.py | 45 + youtube_dl/extractor/rutube.py | 313 + youtube_dl/extractor/rutv.py | 211 + youtube_dl/extractor/ruutu.py | 153 + youtube_dl/extractor/ruv.py | 101 + youtube_dl/extractor/safari.py | 263 + youtube_dl/extractor/sapo.py | 119 + youtube_dl/extractor/savefrom.py | 34 + youtube_dl/extractor/sbs.py | 66 + youtube_dl/extractor/screencast.py | 123 + youtube_dl/extractor/screencastomatic.py | 37 + youtube_dl/extractor/scrippsnetworks.py | 104 + youtube_dl/extractor/seeker.py | 57 + youtube_dl/extractor/senateisvp.py | 153 + youtube_dl/extractor/sendtonews.py | 105 + youtube_dl/extractor/servingsys.py | 72 + youtube_dl/extractor/servus.py | 56 + youtube_dl/extractor/sevenplus.py | 84 + youtube_dl/extractor/sexu.py | 63 + youtube_dl/extractor/seznamzpravy.py | 169 + youtube_dl/extractor/shahid.py | 215 + youtube_dl/extractor/shared.py | 127 + youtube_dl/extractor/showroomlive.py | 84 + youtube_dl/extractor/sina.py | 115 + youtube_dl/extractor/sixplay.py | 129 + youtube_dl/extractor/sky.py | 70 + youtube_dl/extractor/skylinewebcams.py | 42 + youtube_dl/extractor/skynewsarabia.py | 117 + youtube_dl/extractor/slideshare.py | 56 + youtube_dl/extractor/slideslive.py | 39 + youtube_dl/extractor/slutload.py | 65 + youtube_dl/extractor/smotri.py | 416 ++ youtube_dl/extractor/snotr.py | 73 + youtube_dl/extractor/sohu.py | 202 + youtube_dl/extractor/sonyliv.py | 40 + youtube_dl/extractor/soundcloud.py | 795 +++ youtube_dl/extractor/soundgasm.py | 77 + youtube_dl/extractor/southpark.py | 115 + youtube_dl/extractor/spankbang.py | 171 + youtube_dl/extractor/spankwire.py | 127 + youtube_dl/extractor/spiegel.py | 159 + youtube_dl/extractor/spiegeltv.py | 17 + youtube_dl/extractor/spike.py | 57 + youtube_dl/extractor/sport5.py | 92 + youtube_dl/extractor/sportbox.py | 99 + youtube_dl/extractor/sportdeutschland.py | 100 + youtube_dl/extractor/springboardplatform.py | 125 + youtube_dl/extractor/sprout.py | 52 + youtube_dl/extractor/srgssr.py | 186 + youtube_dl/extractor/srmediathek.py | 59 + youtube_dl/extractor/stanfordoc.py | 91 + youtube_dl/extractor/steam.py | 149 + youtube_dl/extractor/stitcher.py | 81 + youtube_dl/extractor/streamable.py | 112 + youtube_dl/extractor/streamango.py | 128 + youtube_dl/extractor/streamcloud.py | 78 + youtube_dl/extractor/streamcz.py | 105 + youtube_dl/extractor/streetvoice.py | 49 + youtube_dl/extractor/stretchinternet.py | 48 + youtube_dl/extractor/stv.py | 94 + youtube_dl/extractor/sunporno.py | 79 + youtube_dl/extractor/sverigesradio.py | 115 + youtube_dl/extractor/svt.py | 371 ++ youtube_dl/extractor/swrmediathek.py | 115 + youtube_dl/extractor/syfy.py | 58 + youtube_dl/extractor/sztvhu.py | 41 + youtube_dl/extractor/tagesschau.py | 311 + youtube_dl/extractor/tass.py | 62 + youtube_dl/extractor/tastytrade.py | 43 + youtube_dl/extractor/tbs.py | 89 + youtube_dl/extractor/tdslifeway.py | 33 + youtube_dl/extractor/teachable.py | 259 + youtube_dl/extractor/teachertube.py | 129 + youtube_dl/extractor/teachingchannel.py | 35 + youtube_dl/extractor/teamcoco.py | 199 + youtube_dl/extractor/teamtreehouse.py | 140 + youtube_dl/extractor/techtalks.py | 82 + youtube_dl/extractor/ted.py | 351 ++ youtube_dl/extractor/tele13.py | 88 + youtube_dl/extractor/tele5.py | 57 + youtube_dl/extractor/telebruxelles.py | 76 + youtube_dl/extractor/telecinco.py | 156 + youtube_dl/extractor/telegraaf.py | 78 + youtube_dl/extractor/telemb.py | 78 + youtube_dl/extractor/telequebec.py | 151 + youtube_dl/extractor/teletask.py | 53 + youtube_dl/extractor/telewebion.py | 55 + youtube_dl/extractor/tennistv.py | 112 + youtube_dl/extractor/testurl.py | 64 + youtube_dl/extractor/tf1.py | 92 + youtube_dl/extractor/tfo.py | 57 + youtube_dl/extractor/theintercept.py | 49 + youtube_dl/extractor/theplatform.py | 411 ++ youtube_dl/extractor/thescene.py | 44 + youtube_dl/extractor/thestar.py | 36 + youtube_dl/extractor/thesun.py | 32 + youtube_dl/extractor/theweatherchannel.py | 79 + youtube_dl/extractor/thisamericanlife.py | 40 + youtube_dl/extractor/thisav.py | 73 + youtube_dl/extractor/thisoldhouse.py | 44 + youtube_dl/extractor/thisvid.py | 97 + youtube_dl/extractor/threeqsdn.py | 142 + youtube_dl/extractor/tiktok.py | 138 + youtube_dl/extractor/tinypic.py | 56 + youtube_dl/extractor/tmz.py | 56 + youtube_dl/extractor/tnaflix.py | 327 + youtube_dl/extractor/toggle.py | 210 + youtube_dl/extractor/tonline.py | 59 + youtube_dl/extractor/toongoggles.py | 81 + youtube_dl/extractor/toutv.py | 93 + youtube_dl/extractor/toypics.py | 90 + youtube_dl/extractor/traileraddict.py | 64 + youtube_dl/extractor/trilulilu.py | 103 + youtube_dl/extractor/trunews.py | 75 + youtube_dl/extractor/trutv.py | 75 + youtube_dl/extractor/tube8.py | 86 + youtube_dl/extractor/tubitv.py | 96 + youtube_dl/extractor/tudou.py | 49 + youtube_dl/extractor/tumblr.py | 214 + youtube_dl/extractor/tunein.py | 183 + youtube_dl/extractor/tunepk.py | 90 + youtube_dl/extractor/turbo.py | 68 + youtube_dl/extractor/turner.py | 234 + youtube_dl/extractor/tutv.py | 36 + youtube_dl/extractor/tv2.py | 145 + youtube_dl/extractor/tv2hu.py | 62 + youtube_dl/extractor/tv4.py | 115 + youtube_dl/extractor/tv5mondeplus.py | 79 + youtube_dl/extractor/tva.py | 54 + youtube_dl/extractor/tvanouvelles.py | 65 + youtube_dl/extractor/tvc.py | 109 + youtube_dl/extractor/tvigle.py | 119 + youtube_dl/extractor/tvland.py | 37 + youtube_dl/extractor/tvn24.py | 79 + youtube_dl/extractor/tvnet.py | 147 + youtube_dl/extractor/tvnoe.py | 48 + youtube_dl/extractor/tvnow.py | 486 ++ youtube_dl/extractor/tvp.py | 252 + youtube_dl/extractor/tvplay.py | 557 ++ youtube_dl/extractor/tvplayer.py | 86 + youtube_dl/extractor/tweakers.py | 62 + youtube_dl/extractor/twentyfourvideo.py | 127 + youtube_dl/extractor/twentymin.py | 91 + youtube_dl/extractor/twentythreevideo.py | 77 + youtube_dl/extractor/twitcasting.py | 81 + youtube_dl/extractor/twitch.py | 731 +++ youtube_dl/extractor/twitter.py | 559 ++ youtube_dl/extractor/udemy.py | 481 ++ youtube_dl/extractor/udn.py | 102 + youtube_dl/extractor/ufctv.py | 73 + youtube_dl/extractor/uktvplay.py | 33 + youtube_dl/extractor/umg.py | 103 + youtube_dl/extractor/unistra.py | 67 + youtube_dl/extractor/unity.py | 32 + youtube_dl/extractor/uol.py | 159 + youtube_dl/extractor/uplynk.py | 70 + youtube_dl/extractor/urort.py | 66 + youtube_dl/extractor/urplay.py | 71 + youtube_dl/extractor/usanetwork.py | 76 + youtube_dl/extractor/usatoday.py | 63 + youtube_dl/extractor/ustream.py | 281 + youtube_dl/extractor/ustudio.py | 125 + youtube_dl/extractor/varzesh3.py | 79 + youtube_dl/extractor/vbox7.py | 105 + youtube_dl/extractor/veehd.py | 118 + youtube_dl/extractor/veoh.py | 103 + youtube_dl/extractor/vessel.py | 157 + youtube_dl/extractor/vesti.py | 121 + youtube_dl/extractor/vevo.py | 374 ++ youtube_dl/extractor/vgtv.py | 307 + youtube_dl/extractor/vh1.py | 41 + youtube_dl/extractor/vice.py | 337 + youtube_dl/extractor/vidbit.py | 84 + youtube_dl/extractor/viddler.py | 138 + youtube_dl/extractor/videa.py | 106 + youtube_dl/extractor/videodetective.py | 30 + youtube_dl/extractor/videofyme.py | 52 + youtube_dl/extractor/videomore.py | 307 + youtube_dl/extractor/videopremium.py | 46 + youtube_dl/extractor/videopress.py | 96 + youtube_dl/extractor/vidio.py | 77 + youtube_dl/extractor/vidlii.py | 125 + youtube_dl/extractor/vidme.py | 295 + youtube_dl/extractor/vidzi.py | 68 + youtube_dl/extractor/vier.py | 264 + youtube_dl/extractor/viewlift.py | 266 + youtube_dl/extractor/viewster.py | 217 + youtube_dl/extractor/viidea.py | 202 + youtube_dl/extractor/viki.py | 384 ++ youtube_dl/extractor/vimeo.py | 1182 ++++ youtube_dl/extractor/vimple.py | 61 + youtube_dl/extractor/vine.py | 154 + youtube_dl/extractor/viqeo.py | 99 + youtube_dl/extractor/viu.py | 272 + youtube_dl/extractor/vk.py | 635 ++ youtube_dl/extractor/vlive.py | 405 ++ youtube_dl/extractor/vodlocker.py | 80 + youtube_dl/extractor/vodpl.py | 32 + youtube_dl/extractor/vodplatform.py | 37 + youtube_dl/extractor/voicerepublic.py | 100 + youtube_dl/extractor/voot.py | 100 + youtube_dl/extractor/voxmedia.py | 182 + youtube_dl/extractor/vrak.py | 80 + youtube_dl/extractor/vrt.py | 87 + youtube_dl/extractor/vrv.py | 269 + youtube_dl/extractor/vshare.py | 74 + youtube_dl/extractor/vube.py | 172 + youtube_dl/extractor/vuclip.py | 70 + youtube_dl/extractor/vvvvid.py | 158 + youtube_dl/extractor/vyborymos.py | 55 + youtube_dl/extractor/vzaar.py | 95 + youtube_dl/extractor/wakanim.py | 66 + youtube_dl/extractor/walla.py | 86 + youtube_dl/extractor/washingtonpost.py | 183 + youtube_dl/extractor/wat.py | 157 + youtube_dl/extractor/watchbox.py | 161 + youtube_dl/extractor/watchindianporn.py | 68 + youtube_dl/extractor/wdr.py | 330 + youtube_dl/extractor/webcaster.py | 102 + youtube_dl/extractor/webofstories.py | 160 + youtube_dl/extractor/weibo.py | 140 + youtube_dl/extractor/weiqitv.py | 52 + youtube_dl/extractor/wimp.py | 54 + youtube_dl/extractor/wistia.py | 126 + youtube_dl/extractor/worldstarhiphop.py | 40 + youtube_dl/extractor/wsj.py | 123 + youtube_dl/extractor/wwe.py | 140 + youtube_dl/extractor/xbef.py | 44 + youtube_dl/extractor/xboxclips.py | 53 + youtube_dl/extractor/xfileshare.py | 213 + youtube_dl/extractor/xhamster.py | 324 + youtube_dl/extractor/xiami.py | 201 + youtube_dl/extractor/ximalaya.py | 233 + youtube_dl/extractor/xminus.py | 79 + youtube_dl/extractor/xnxx.py | 84 + youtube_dl/extractor/xstream.py | 119 + youtube_dl/extractor/xtube.py | 180 + youtube_dl/extractor/xuite.py | 153 + youtube_dl/extractor/xvideos.py | 110 + youtube_dl/extractor/xxxymovies.py | 81 + youtube_dl/extractor/yahoo.py | 558 ++ youtube_dl/extractor/yandexdisk.py | 118 + youtube_dl/extractor/yandexmusic.py | 265 + youtube_dl/extractor/yandexvideo.py | 90 + youtube_dl/extractor/yapfiles.py | 101 + youtube_dl/extractor/yesjapan.py | 62 + youtube_dl/extractor/yinyuetai.py | 56 + youtube_dl/extractor/ynet.py | 52 + youtube_dl/extractor/youjizz.py | 95 + youtube_dl/extractor/youku.py | 309 + youtube_dl/extractor/younow.py | 202 + youtube_dl/extractor/youporn.py | 192 + youtube_dl/extractor/yourporn.py | 57 + youtube_dl/extractor/yourupload.py | 46 + youtube_dl/extractor/youtube.py | 3201 ++++++++++ youtube_dl/extractor/zapiks.py | 110 + youtube_dl/extractor/zaq1.py | 101 + youtube_dl/extractor/zattoo.py | 433 ++ youtube_dl/extractor/zdf.py | 319 + youtube_dl/extractor/zingmp3.py | 143 + youtube_dl/extractor/zype.py | 57 + youtube_dl/jsinterp.py | 262 + youtube_dl/options.py | 916 +++ youtube_dl/postprocessor/__init__.py | 40 + youtube_dl/postprocessor/common.py | 69 + youtube_dl/postprocessor/embedthumbnail.py | 93 + youtube_dl/postprocessor/execafterdownload.py | 31 + youtube_dl/postprocessor/ffmpeg.py | 646 ++ youtube_dl/postprocessor/metadatafromtitle.py | 48 + youtube_dl/postprocessor/xattrpp.py | 79 + youtube_dl/socks.py | 273 + youtube_dl/swfinterp.py | 834 +++ youtube_dl/update.py | 187 + youtube_dl/utils.py | 5593 +++++++++++++++++ youtube_dl/version.py | 3 + 945 files changed, 155520 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/1_broken_site.md create mode 100644 .github/ISSUE_TEMPLATE/2_site_support_request.md create mode 100644 .github/ISSUE_TEMPLATE/3_site_feature_request.md create mode 100644 .github/ISSUE_TEMPLATE/4_bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/5_feature_request.md create mode 100644 .github/ISSUE_TEMPLATE/6_question.md create mode 100644 .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md create mode 100644 .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md create mode 100644 .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md create mode 100644 .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md create mode 100644 .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md create mode 100644 .github/PULL_REQUEST_TEMPLATE.md create mode 100644 .gitignore create mode 100644 .travis.yml create mode 100644 AUTHORS create mode 100644 CONTRIBUTING.md create mode 100644 ChangeLog create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 Makefile create mode 100644 README.md create mode 100755 bin/youtube-dl create mode 100644 devscripts/SizeOfImage.patch create mode 100644 devscripts/SizeOfImage_w.patch create mode 100644 devscripts/bash-completion.in create mode 100755 devscripts/bash-completion.py create mode 100644 devscripts/buildserver.py create mode 100644 devscripts/check-porn.py create mode 100644 devscripts/create-github-release.py create mode 100644 devscripts/fish-completion.in create mode 100755 devscripts/fish-completion.py create mode 100644 devscripts/generate_aes_testdata.py create mode 100755 devscripts/gh-pages/add-version.py create mode 100755 devscripts/gh-pages/generate-download.py create mode 100755 devscripts/gh-pages/sign-versions.py create mode 100755 devscripts/gh-pages/update-copyright.py create mode 100755 devscripts/gh-pages/update-feed.py create mode 100755 devscripts/gh-pages/update-sites.py create mode 100755 devscripts/install_jython.sh create mode 100644 devscripts/lazy_load_template.py create mode 100755 devscripts/make_contributing.py create mode 100644 devscripts/make_issue_template.py create mode 100644 devscripts/make_lazy_extractors.py create mode 100755 devscripts/make_readme.py create mode 100644 devscripts/make_supportedsites.py create mode 100755 devscripts/posix-locale.sh create mode 100644 devscripts/prepare_manpage.py create mode 100755 devscripts/release.sh create mode 100755 devscripts/run_tests.sh create mode 100644 devscripts/show-downloads-statistics.py create mode 100755 devscripts/wine-py2exe.sh create mode 100644 devscripts/zsh-completion.in create mode 100755 devscripts/zsh-completion.py create mode 100644 docs/.gitignore create mode 100644 docs/Makefile create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/module_guide.rst create mode 100644 docs/supportedsites.md create mode 100644 setup.cfg create mode 100644 setup.py create mode 100644 test/__init__.py create mode 100644 test/helper.py create mode 100644 test/parameters.json create mode 100644 test/swftests/.gitignore create mode 100644 test/swftests/ArrayAccess.as create mode 100644 test/swftests/ClassCall.as create mode 100644 test/swftests/ClassConstruction.as create mode 100644 test/swftests/ConstArrayAccess.as create mode 100644 test/swftests/ConstantInt.as create mode 100644 test/swftests/DictCall.as create mode 100644 test/swftests/EqualsOperator.as create mode 100644 test/swftests/LocalVars.as create mode 100644 test/swftests/MemberAssignment.as create mode 100644 test/swftests/NeOperator.as create mode 100644 test/swftests/PrivateCall.as create mode 100644 test/swftests/PrivateVoidCall.as create mode 100644 test/swftests/StaticAssignment.as create mode 100644 test/swftests/StaticRetrieval.as create mode 100644 test/swftests/StringBasics.as create mode 100644 test/swftests/StringCharCodeAt.as create mode 100644 test/swftests/StringConversion.as create mode 100644 test/test_InfoExtractor.py create mode 100644 test/test_YoutubeDL.py create mode 100644 test/test_YoutubeDLCookieJar.py create mode 100644 test/test_aes.py create mode 100644 test/test_age_restriction.py create mode 100644 test/test_all_urls.py create mode 100644 test/test_cache.py create mode 100644 test/test_compat.py create mode 100644 test/test_download.py create mode 100644 test/test_downloader_http.py create mode 100644 test/test_execution.py create mode 100644 test/test_http.py create mode 100644 test/test_iqiyi_sdk_interpreter.py create mode 100644 test/test_jsinterp.py create mode 100644 test/test_netrc.py create mode 100644 test/test_options.py create mode 100644 test/test_postprocessors.py create mode 100644 test/test_socks.py create mode 100644 test/test_subtitles.py create mode 100644 test/test_swfinterp.py create mode 100644 test/test_unicode_literals.py create mode 100644 test/test_update.py create mode 100644 test/test_utils.py create mode 100644 test/test_verbose_output.py create mode 100644 test/test_write_annotations.py create mode 100644 test/test_youtube_chapters.py create mode 100644 test/test_youtube_lists.py create mode 100644 test/test_youtube_signature.py create mode 100644 test/testcert.pem create mode 100644 test/testdata/cookies/httponly_cookies.txt create mode 100644 test/testdata/cookies/session_cookies.txt create mode 100644 test/testdata/f4m/custom_base_url.f4m create mode 100644 test/testdata/m3u8/pluzz_francetv_11507.m3u8 create mode 100644 test/testdata/m3u8/teamcoco_11995.m3u8 create mode 100644 test/testdata/m3u8/ted_18923.m3u8 create mode 100644 test/testdata/m3u8/toggle_mobile_12211.m3u8 create mode 100644 test/testdata/m3u8/twitch_vod.m3u8 create mode 100644 test/testdata/m3u8/vidio.m3u8 create mode 100644 test/testdata/mpd/float_duration.mpd create mode 100644 test/testdata/mpd/unfragmented.mpd create mode 100644 test/testdata/mpd/urls_only.mpd create mode 100644 test/testdata/xspf/foo_xspf.xspf create mode 100644 test/versions.json create mode 100644 tox.ini create mode 100644 youtube-dl.plugin.zsh create mode 100755 youtube_dl/YoutubeDL.py create mode 100644 youtube_dl/__init__.py create mode 100755 youtube_dl/__main__.py create mode 100644 youtube_dl/aes.py create mode 100644 youtube_dl/cache.py create mode 100644 youtube_dl/compat.py create mode 100644 youtube_dl/downloader/__init__.py create mode 100644 youtube_dl/downloader/common.py create mode 100644 youtube_dl/downloader/dash.py create mode 100644 youtube_dl/downloader/external.py create mode 100644 youtube_dl/downloader/f4m.py create mode 100644 youtube_dl/downloader/fragment.py create mode 100644 youtube_dl/downloader/hls.py create mode 100644 youtube_dl/downloader/http.py create mode 100644 youtube_dl/downloader/ism.py create mode 100644 youtube_dl/downloader/rtmp.py create mode 100644 youtube_dl/downloader/rtsp.py create mode 100644 youtube_dl/extractor/__init__.py create mode 100644 youtube_dl/extractor/abc.py create mode 100644 youtube_dl/extractor/abcnews.py create mode 100644 youtube_dl/extractor/abcotvs.py create mode 100644 youtube_dl/extractor/academicearth.py create mode 100644 youtube_dl/extractor/acast.py create mode 100644 youtube_dl/extractor/addanime.py create mode 100644 youtube_dl/extractor/adn.py create mode 100644 youtube_dl/extractor/adobeconnect.py create mode 100644 youtube_dl/extractor/adobepass.py create mode 100644 youtube_dl/extractor/adobetv.py create mode 100644 youtube_dl/extractor/adultswim.py create mode 100644 youtube_dl/extractor/aenetworks.py create mode 100644 youtube_dl/extractor/afreecatv.py create mode 100644 youtube_dl/extractor/airmozilla.py create mode 100644 youtube_dl/extractor/aliexpress.py create mode 100644 youtube_dl/extractor/aljazeera.py create mode 100644 youtube_dl/extractor/allocine.py create mode 100644 youtube_dl/extractor/alphaporno.py create mode 100644 youtube_dl/extractor/amcnetworks.py create mode 100644 youtube_dl/extractor/americastestkitchen.py create mode 100644 youtube_dl/extractor/amp.py create mode 100644 youtube_dl/extractor/animeondemand.py create mode 100644 youtube_dl/extractor/anvato.py create mode 100644 youtube_dl/extractor/aol.py create mode 100644 youtube_dl/extractor/apa.py create mode 100644 youtube_dl/extractor/aparat.py create mode 100644 youtube_dl/extractor/appleconnect.py create mode 100644 youtube_dl/extractor/appletrailers.py create mode 100644 youtube_dl/extractor/archiveorg.py create mode 100644 youtube_dl/extractor/ard.py create mode 100644 youtube_dl/extractor/arkena.py create mode 100644 youtube_dl/extractor/arte.py create mode 100644 youtube_dl/extractor/asiancrush.py create mode 100644 youtube_dl/extractor/atresplayer.py create mode 100644 youtube_dl/extractor/atttechchannel.py create mode 100644 youtube_dl/extractor/atvat.py create mode 100644 youtube_dl/extractor/audimedia.py create mode 100644 youtube_dl/extractor/audioboom.py create mode 100644 youtube_dl/extractor/audiomack.py create mode 100644 youtube_dl/extractor/awaan.py create mode 100644 youtube_dl/extractor/aws.py create mode 100644 youtube_dl/extractor/azmedien.py create mode 100644 youtube_dl/extractor/baidu.py create mode 100644 youtube_dl/extractor/bambuser.py create mode 100644 youtube_dl/extractor/bandcamp.py create mode 100644 youtube_dl/extractor/bbc.py create mode 100644 youtube_dl/extractor/beampro.py create mode 100644 youtube_dl/extractor/beatport.py create mode 100644 youtube_dl/extractor/beeg.py create mode 100644 youtube_dl/extractor/behindkink.py create mode 100644 youtube_dl/extractor/bellmedia.py create mode 100644 youtube_dl/extractor/bet.py create mode 100644 youtube_dl/extractor/bfi.py create mode 100644 youtube_dl/extractor/bigflix.py create mode 100644 youtube_dl/extractor/bild.py create mode 100644 youtube_dl/extractor/bilibili.py create mode 100644 youtube_dl/extractor/biobiochiletv.py create mode 100644 youtube_dl/extractor/biqle.py create mode 100644 youtube_dl/extractor/bitchute.py create mode 100644 youtube_dl/extractor/bleacherreport.py create mode 100644 youtube_dl/extractor/blinkx.py create mode 100644 youtube_dl/extractor/bloomberg.py create mode 100644 youtube_dl/extractor/bokecc.py create mode 100644 youtube_dl/extractor/bostonglobe.py create mode 100644 youtube_dl/extractor/bpb.py create mode 100644 youtube_dl/extractor/br.py create mode 100644 youtube_dl/extractor/bravotv.py create mode 100644 youtube_dl/extractor/breakcom.py create mode 100644 youtube_dl/extractor/brightcove.py create mode 100644 youtube_dl/extractor/businessinsider.py create mode 100644 youtube_dl/extractor/buzzfeed.py create mode 100644 youtube_dl/extractor/byutv.py create mode 100644 youtube_dl/extractor/c56.py create mode 100644 youtube_dl/extractor/camdemy.py create mode 100644 youtube_dl/extractor/cammodels.py create mode 100644 youtube_dl/extractor/camtube.py create mode 100644 youtube_dl/extractor/camwithher.py create mode 100644 youtube_dl/extractor/canalc2.py create mode 100644 youtube_dl/extractor/canalplus.py create mode 100644 youtube_dl/extractor/canvas.py create mode 100644 youtube_dl/extractor/carambatv.py create mode 100644 youtube_dl/extractor/cartoonnetwork.py create mode 100644 youtube_dl/extractor/cbc.py create mode 100644 youtube_dl/extractor/cbs.py create mode 100644 youtube_dl/extractor/cbsinteractive.py create mode 100644 youtube_dl/extractor/cbslocal.py create mode 100644 youtube_dl/extractor/cbsnews.py create mode 100644 youtube_dl/extractor/cbssports.py create mode 100644 youtube_dl/extractor/ccc.py create mode 100644 youtube_dl/extractor/ccma.py create mode 100644 youtube_dl/extractor/cctv.py create mode 100644 youtube_dl/extractor/cda.py create mode 100644 youtube_dl/extractor/ceskatelevize.py create mode 100644 youtube_dl/extractor/channel9.py create mode 100644 youtube_dl/extractor/charlierose.py create mode 100644 youtube_dl/extractor/chaturbate.py create mode 100644 youtube_dl/extractor/chilloutzone.py create mode 100644 youtube_dl/extractor/chirbit.py create mode 100644 youtube_dl/extractor/cinchcast.py create mode 100644 youtube_dl/extractor/cinemax.py create mode 100644 youtube_dl/extractor/ciscolive.py create mode 100644 youtube_dl/extractor/cjsw.py create mode 100644 youtube_dl/extractor/cliphunter.py create mode 100644 youtube_dl/extractor/clippit.py create mode 100644 youtube_dl/extractor/cliprs.py create mode 100644 youtube_dl/extractor/clipsyndicate.py create mode 100644 youtube_dl/extractor/closertotruth.py create mode 100644 youtube_dl/extractor/cloudflarestream.py create mode 100644 youtube_dl/extractor/cloudy.py create mode 100644 youtube_dl/extractor/clubic.py create mode 100644 youtube_dl/extractor/clyp.py create mode 100644 youtube_dl/extractor/cmt.py create mode 100644 youtube_dl/extractor/cnbc.py create mode 100644 youtube_dl/extractor/cnn.py create mode 100644 youtube_dl/extractor/comcarcoff.py create mode 100644 youtube_dl/extractor/comedycentral.py create mode 100644 youtube_dl/extractor/common.py create mode 100644 youtube_dl/extractor/commonmistakes.py create mode 100644 youtube_dl/extractor/commonprotocols.py create mode 100644 youtube_dl/extractor/condenast.py create mode 100644 youtube_dl/extractor/corus.py create mode 100644 youtube_dl/extractor/coub.py create mode 100644 youtube_dl/extractor/cracked.py create mode 100644 youtube_dl/extractor/crackle.py create mode 100644 youtube_dl/extractor/crooksandliars.py create mode 100644 youtube_dl/extractor/crunchyroll.py create mode 100644 youtube_dl/extractor/cspan.py create mode 100644 youtube_dl/extractor/ctsnews.py create mode 100644 youtube_dl/extractor/ctvnews.py create mode 100644 youtube_dl/extractor/cultureunplugged.py create mode 100644 youtube_dl/extractor/curiositystream.py create mode 100644 youtube_dl/extractor/cwtv.py create mode 100644 youtube_dl/extractor/dailymail.py create mode 100644 youtube_dl/extractor/dailymotion.py create mode 100644 youtube_dl/extractor/daisuki.py create mode 100644 youtube_dl/extractor/daum.py create mode 100644 youtube_dl/extractor/dbtv.py create mode 100644 youtube_dl/extractor/dctp.py create mode 100644 youtube_dl/extractor/deezer.py create mode 100644 youtube_dl/extractor/defense.py create mode 100644 youtube_dl/extractor/democracynow.py create mode 100644 youtube_dl/extractor/dfb.py create mode 100644 youtube_dl/extractor/dhm.py create mode 100644 youtube_dl/extractor/digg.py create mode 100644 youtube_dl/extractor/digiteka.py create mode 100644 youtube_dl/extractor/discovery.py create mode 100644 youtube_dl/extractor/discoverygo.py create mode 100644 youtube_dl/extractor/discoverynetworks.py create mode 100644 youtube_dl/extractor/discoveryvr.py create mode 100644 youtube_dl/extractor/disney.py create mode 100644 youtube_dl/extractor/dispeak.py create mode 100644 youtube_dl/extractor/dotsub.py create mode 100644 youtube_dl/extractor/douyutv.py create mode 100644 youtube_dl/extractor/dplay.py create mode 100644 youtube_dl/extractor/drbonanza.py create mode 100644 youtube_dl/extractor/dreisat.py create mode 100644 youtube_dl/extractor/dropbox.py create mode 100644 youtube_dl/extractor/drtuber.py create mode 100644 youtube_dl/extractor/drtv.py create mode 100644 youtube_dl/extractor/dtube.py create mode 100644 youtube_dl/extractor/dumpert.py create mode 100644 youtube_dl/extractor/dvtv.py create mode 100644 youtube_dl/extractor/dw.py create mode 100644 youtube_dl/extractor/eagleplatform.py create mode 100644 youtube_dl/extractor/ebaumsworld.py create mode 100644 youtube_dl/extractor/echomsk.py create mode 100644 youtube_dl/extractor/egghead.py create mode 100644 youtube_dl/extractor/ehow.py create mode 100644 youtube_dl/extractor/eighttracks.py create mode 100644 youtube_dl/extractor/einthusan.py create mode 100644 youtube_dl/extractor/eitb.py create mode 100644 youtube_dl/extractor/ellentube.py create mode 100644 youtube_dl/extractor/elpais.py create mode 100644 youtube_dl/extractor/embedly.py create mode 100644 youtube_dl/extractor/engadget.py create mode 100644 youtube_dl/extractor/eporner.py create mode 100644 youtube_dl/extractor/eroprofile.py create mode 100644 youtube_dl/extractor/escapist.py create mode 100644 youtube_dl/extractor/espn.py create mode 100644 youtube_dl/extractor/esri.py create mode 100644 youtube_dl/extractor/europa.py create mode 100644 youtube_dl/extractor/everyonesmixtape.py create mode 100644 youtube_dl/extractor/expotv.py create mode 100644 youtube_dl/extractor/expressen.py create mode 100644 youtube_dl/extractor/extractors.py create mode 100644 youtube_dl/extractor/extremetube.py create mode 100644 youtube_dl/extractor/eyedotv.py create mode 100644 youtube_dl/extractor/facebook.py create mode 100644 youtube_dl/extractor/faz.py create mode 100644 youtube_dl/extractor/fc2.py create mode 100644 youtube_dl/extractor/fczenit.py create mode 100644 youtube_dl/extractor/filmon.py create mode 100644 youtube_dl/extractor/filmweb.py create mode 100644 youtube_dl/extractor/firsttv.py create mode 100644 youtube_dl/extractor/fivemin.py create mode 100644 youtube_dl/extractor/fivetv.py create mode 100644 youtube_dl/extractor/flickr.py create mode 100644 youtube_dl/extractor/flipagram.py create mode 100644 youtube_dl/extractor/folketinget.py create mode 100644 youtube_dl/extractor/footyroom.py create mode 100644 youtube_dl/extractor/formula1.py create mode 100644 youtube_dl/extractor/fourtube.py create mode 100644 youtube_dl/extractor/fox.py create mode 100644 youtube_dl/extractor/fox9.py create mode 100644 youtube_dl/extractor/foxgay.py create mode 100644 youtube_dl/extractor/foxnews.py create mode 100644 youtube_dl/extractor/foxsports.py create mode 100644 youtube_dl/extractor/franceculture.py create mode 100644 youtube_dl/extractor/franceinter.py create mode 100644 youtube_dl/extractor/francetv.py create mode 100644 youtube_dl/extractor/freesound.py create mode 100644 youtube_dl/extractor/freespeech.py create mode 100644 youtube_dl/extractor/freshlive.py create mode 100644 youtube_dl/extractor/frontendmasters.py create mode 100644 youtube_dl/extractor/funimation.py create mode 100644 youtube_dl/extractor/funk.py create mode 100644 youtube_dl/extractor/funnyordie.py create mode 100644 youtube_dl/extractor/fusion.py create mode 100644 youtube_dl/extractor/fxnetworks.py create mode 100644 youtube_dl/extractor/gaia.py create mode 100644 youtube_dl/extractor/gameinformer.py create mode 100644 youtube_dl/extractor/gameone.py create mode 100644 youtube_dl/extractor/gamespot.py create mode 100644 youtube_dl/extractor/gamestar.py create mode 100644 youtube_dl/extractor/gaskrank.py create mode 100644 youtube_dl/extractor/gazeta.py create mode 100644 youtube_dl/extractor/gdcvault.py create mode 100644 youtube_dl/extractor/generic.py create mode 100644 youtube_dl/extractor/gfycat.py create mode 100644 youtube_dl/extractor/giantbomb.py create mode 100644 youtube_dl/extractor/giga.py create mode 100644 youtube_dl/extractor/gigya.py create mode 100644 youtube_dl/extractor/glide.py create mode 100644 youtube_dl/extractor/globo.py create mode 100644 youtube_dl/extractor/go.py create mode 100644 youtube_dl/extractor/go90.py create mode 100644 youtube_dl/extractor/godtube.py create mode 100644 youtube_dl/extractor/golem.py create mode 100644 youtube_dl/extractor/googledrive.py create mode 100644 youtube_dl/extractor/googleplus.py create mode 100644 youtube_dl/extractor/googlesearch.py create mode 100644 youtube_dl/extractor/goshgay.py create mode 100644 youtube_dl/extractor/gputechconf.py create mode 100644 youtube_dl/extractor/groupon.py create mode 100644 youtube_dl/extractor/hark.py create mode 100644 youtube_dl/extractor/hbo.py create mode 100644 youtube_dl/extractor/hearthisat.py create mode 100644 youtube_dl/extractor/heise.py create mode 100644 youtube_dl/extractor/hellporno.py create mode 100644 youtube_dl/extractor/helsinki.py create mode 100644 youtube_dl/extractor/hentaistigma.py create mode 100644 youtube_dl/extractor/hgtv.py create mode 100644 youtube_dl/extractor/hidive.py create mode 100644 youtube_dl/extractor/historicfilms.py create mode 100644 youtube_dl/extractor/hitbox.py create mode 100644 youtube_dl/extractor/hitrecord.py create mode 100644 youtube_dl/extractor/hketv.py create mode 100644 youtube_dl/extractor/hornbunny.py create mode 100644 youtube_dl/extractor/hotnewhiphop.py create mode 100644 youtube_dl/extractor/hotstar.py create mode 100644 youtube_dl/extractor/howcast.py create mode 100644 youtube_dl/extractor/howstuffworks.py create mode 100644 youtube_dl/extractor/hrti.py create mode 100644 youtube_dl/extractor/huajiao.py create mode 100644 youtube_dl/extractor/huffpost.py create mode 100644 youtube_dl/extractor/hungama.py create mode 100644 youtube_dl/extractor/hypem.py create mode 100644 youtube_dl/extractor/iconosquare.py create mode 100644 youtube_dl/extractor/ign.py create mode 100644 youtube_dl/extractor/imdb.py create mode 100644 youtube_dl/extractor/imgur.py create mode 100644 youtube_dl/extractor/ina.py create mode 100644 youtube_dl/extractor/inc.py create mode 100644 youtube_dl/extractor/indavideo.py create mode 100644 youtube_dl/extractor/infoq.py create mode 100644 youtube_dl/extractor/instagram.py create mode 100644 youtube_dl/extractor/internazionale.py create mode 100644 youtube_dl/extractor/internetvideoarchive.py create mode 100644 youtube_dl/extractor/iprima.py create mode 100644 youtube_dl/extractor/iqiyi.py create mode 100644 youtube_dl/extractor/ir90tv.py create mode 100644 youtube_dl/extractor/itv.py create mode 100644 youtube_dl/extractor/ivi.py create mode 100644 youtube_dl/extractor/ivideon.py create mode 100644 youtube_dl/extractor/iwara.py create mode 100644 youtube_dl/extractor/izlesene.py create mode 100644 youtube_dl/extractor/jamendo.py create mode 100644 youtube_dl/extractor/jeuxvideo.py create mode 100644 youtube_dl/extractor/joj.py create mode 100644 youtube_dl/extractor/jove.py create mode 100644 youtube_dl/extractor/jpopsukitv.py create mode 100644 youtube_dl/extractor/jwplatform.py create mode 100644 youtube_dl/extractor/kakao.py create mode 100644 youtube_dl/extractor/kaltura.py create mode 100644 youtube_dl/extractor/kanalplay.py create mode 100644 youtube_dl/extractor/kankan.py create mode 100644 youtube_dl/extractor/karaoketv.py create mode 100644 youtube_dl/extractor/karrierevideos.py create mode 100644 youtube_dl/extractor/keek.py create mode 100644 youtube_dl/extractor/keezmovies.py create mode 100644 youtube_dl/extractor/ketnet.py create mode 100644 youtube_dl/extractor/khanacademy.py create mode 100644 youtube_dl/extractor/kickstarter.py create mode 100644 youtube_dl/extractor/kinopoisk.py create mode 100644 youtube_dl/extractor/konserthusetplay.py create mode 100644 youtube_dl/extractor/kontrtube.py create mode 100644 youtube_dl/extractor/krasview.py create mode 100644 youtube_dl/extractor/ku6.py create mode 100644 youtube_dl/extractor/kusi.py create mode 100644 youtube_dl/extractor/kuwo.py create mode 100644 youtube_dl/extractor/la7.py create mode 100644 youtube_dl/extractor/laola1tv.py create mode 100644 youtube_dl/extractor/lci.py create mode 100644 youtube_dl/extractor/lcp.py create mode 100644 youtube_dl/extractor/learnr.py create mode 100644 youtube_dl/extractor/lecture2go.py create mode 100644 youtube_dl/extractor/lecturio.py create mode 100644 youtube_dl/extractor/leeco.py create mode 100644 youtube_dl/extractor/lego.py create mode 100644 youtube_dl/extractor/lemonde.py create mode 100644 youtube_dl/extractor/lenta.py create mode 100644 youtube_dl/extractor/libraryofcongress.py create mode 100644 youtube_dl/extractor/libsyn.py create mode 100644 youtube_dl/extractor/lifenews.py create mode 100644 youtube_dl/extractor/limelight.py create mode 100644 youtube_dl/extractor/line.py create mode 100644 youtube_dl/extractor/linkedin.py create mode 100644 youtube_dl/extractor/linuxacademy.py create mode 100644 youtube_dl/extractor/litv.py create mode 100644 youtube_dl/extractor/liveleak.py create mode 100644 youtube_dl/extractor/livestream.py create mode 100644 youtube_dl/extractor/lnkgo.py create mode 100644 youtube_dl/extractor/localnews8.py create mode 100644 youtube_dl/extractor/lovehomeporn.py create mode 100644 youtube_dl/extractor/lrt.py create mode 100644 youtube_dl/extractor/lynda.py create mode 100644 youtube_dl/extractor/m6.py create mode 100644 youtube_dl/extractor/macgamestore.py create mode 100644 youtube_dl/extractor/mailru.py create mode 100644 youtube_dl/extractor/makertv.py create mode 100644 youtube_dl/extractor/malltv.py create mode 100644 youtube_dl/extractor/mangomolo.py create mode 100644 youtube_dl/extractor/manyvids.py create mode 100644 youtube_dl/extractor/markiza.py create mode 100644 youtube_dl/extractor/massengeschmacktv.py create mode 100644 youtube_dl/extractor/matchtv.py create mode 100644 youtube_dl/extractor/mdr.py create mode 100644 youtube_dl/extractor/medialaan.py create mode 100644 youtube_dl/extractor/mediaset.py create mode 100644 youtube_dl/extractor/mediasite.py create mode 100644 youtube_dl/extractor/medici.py create mode 100644 youtube_dl/extractor/megaphone.py create mode 100644 youtube_dl/extractor/meipai.py create mode 100644 youtube_dl/extractor/melonvod.py create mode 100644 youtube_dl/extractor/meta.py create mode 100644 youtube_dl/extractor/metacafe.py create mode 100644 youtube_dl/extractor/metacritic.py create mode 100644 youtube_dl/extractor/mgoon.py create mode 100644 youtube_dl/extractor/mgtv.py create mode 100644 youtube_dl/extractor/miaopai.py create mode 100644 youtube_dl/extractor/microsoftvirtualacademy.py create mode 100644 youtube_dl/extractor/minhateca.py create mode 100644 youtube_dl/extractor/ministrygrid.py create mode 100644 youtube_dl/extractor/minoto.py create mode 100644 youtube_dl/extractor/miomio.py create mode 100644 youtube_dl/extractor/mit.py create mode 100644 youtube_dl/extractor/mitele.py create mode 100644 youtube_dl/extractor/mixcloud.py create mode 100644 youtube_dl/extractor/mlb.py create mode 100644 youtube_dl/extractor/mnet.py create mode 100644 youtube_dl/extractor/moevideo.py create mode 100644 youtube_dl/extractor/mofosex.py create mode 100644 youtube_dl/extractor/mojvideo.py create mode 100644 youtube_dl/extractor/morningstar.py create mode 100644 youtube_dl/extractor/motherless.py create mode 100644 youtube_dl/extractor/motorsport.py create mode 100644 youtube_dl/extractor/movieclips.py create mode 100644 youtube_dl/extractor/moviezine.py create mode 100644 youtube_dl/extractor/movingimage.py create mode 100644 youtube_dl/extractor/msn.py create mode 100644 youtube_dl/extractor/mtv.py create mode 100644 youtube_dl/extractor/muenchentv.py create mode 100644 youtube_dl/extractor/musicplayon.py create mode 100644 youtube_dl/extractor/mwave.py create mode 100644 youtube_dl/extractor/mychannels.py create mode 100644 youtube_dl/extractor/myspace.py create mode 100644 youtube_dl/extractor/myspass.py create mode 100644 youtube_dl/extractor/myvi.py create mode 100644 youtube_dl/extractor/myvidster.py create mode 100644 youtube_dl/extractor/nationalgeographic.py create mode 100644 youtube_dl/extractor/naver.py create mode 100644 youtube_dl/extractor/nba.py create mode 100644 youtube_dl/extractor/nbc.py create mode 100644 youtube_dl/extractor/ndr.py create mode 100644 youtube_dl/extractor/ndtv.py create mode 100644 youtube_dl/extractor/nerdcubed.py create mode 100644 youtube_dl/extractor/neteasemusic.py create mode 100644 youtube_dl/extractor/netzkino.py create mode 100644 youtube_dl/extractor/newgrounds.py create mode 100644 youtube_dl/extractor/newstube.py create mode 100644 youtube_dl/extractor/nextmedia.py create mode 100644 youtube_dl/extractor/nexx.py create mode 100644 youtube_dl/extractor/nfl.py create mode 100644 youtube_dl/extractor/nhk.py create mode 100644 youtube_dl/extractor/nhl.py create mode 100644 youtube_dl/extractor/nick.py create mode 100644 youtube_dl/extractor/niconico.py create mode 100644 youtube_dl/extractor/ninecninemedia.py create mode 100644 youtube_dl/extractor/ninegag.py create mode 100644 youtube_dl/extractor/ninenow.py create mode 100644 youtube_dl/extractor/nintendo.py create mode 100644 youtube_dl/extractor/njpwworld.py create mode 100644 youtube_dl/extractor/nobelprize.py create mode 100644 youtube_dl/extractor/noco.py create mode 100644 youtube_dl/extractor/nonktube.py create mode 100644 youtube_dl/extractor/noovo.py create mode 100644 youtube_dl/extractor/normalboots.py create mode 100644 youtube_dl/extractor/nosvideo.py create mode 100644 youtube_dl/extractor/nova.py create mode 100644 youtube_dl/extractor/nowness.py create mode 100644 youtube_dl/extractor/noz.py create mode 100644 youtube_dl/extractor/npo.py create mode 100644 youtube_dl/extractor/npr.py create mode 100644 youtube_dl/extractor/nrk.py create mode 100644 youtube_dl/extractor/nrl.py create mode 100644 youtube_dl/extractor/ntvcojp.py create mode 100644 youtube_dl/extractor/ntvde.py create mode 100644 youtube_dl/extractor/ntvru.py create mode 100644 youtube_dl/extractor/nuevo.py create mode 100644 youtube_dl/extractor/nuvid.py create mode 100644 youtube_dl/extractor/nytimes.py create mode 100644 youtube_dl/extractor/nzz.py create mode 100644 youtube_dl/extractor/odatv.py create mode 100644 youtube_dl/extractor/odnoklassniki.py create mode 100644 youtube_dl/extractor/oktoberfesttv.py create mode 100644 youtube_dl/extractor/once.py create mode 100644 youtube_dl/extractor/ondemandkorea.py create mode 100644 youtube_dl/extractor/onet.py create mode 100644 youtube_dl/extractor/onionstudios.py create mode 100644 youtube_dl/extractor/ooyala.py create mode 100644 youtube_dl/extractor/openload.py create mode 100644 youtube_dl/extractor/ora.py create mode 100644 youtube_dl/extractor/orf.py create mode 100644 youtube_dl/extractor/outsidetv.py create mode 100644 youtube_dl/extractor/packtpub.py create mode 100644 youtube_dl/extractor/pandatv.py create mode 100644 youtube_dl/extractor/pandoratv.py create mode 100644 youtube_dl/extractor/parliamentliveuk.py create mode 100644 youtube_dl/extractor/patreon.py create mode 100644 youtube_dl/extractor/pbs.py create mode 100644 youtube_dl/extractor/pearvideo.py create mode 100644 youtube_dl/extractor/peertube.py create mode 100644 youtube_dl/extractor/people.py create mode 100644 youtube_dl/extractor/performgroup.py create mode 100644 youtube_dl/extractor/periscope.py create mode 100644 youtube_dl/extractor/philharmoniedeparis.py create mode 100644 youtube_dl/extractor/phoenix.py create mode 100644 youtube_dl/extractor/photobucket.py create mode 100644 youtube_dl/extractor/picarto.py create mode 100644 youtube_dl/extractor/piksel.py create mode 100644 youtube_dl/extractor/pinkbike.py create mode 100644 youtube_dl/extractor/pladform.py create mode 100644 youtube_dl/extractor/platzi.py create mode 100644 youtube_dl/extractor/playfm.py create mode 100644 youtube_dl/extractor/playplustv.py create mode 100644 youtube_dl/extractor/plays.py create mode 100644 youtube_dl/extractor/playtvak.py create mode 100644 youtube_dl/extractor/playvid.py create mode 100644 youtube_dl/extractor/playwire.py create mode 100644 youtube_dl/extractor/pluralsight.py create mode 100644 youtube_dl/extractor/podomatic.py create mode 100644 youtube_dl/extractor/pokemon.py create mode 100644 youtube_dl/extractor/polskieradio.py create mode 100644 youtube_dl/extractor/popcorntv.py create mode 100644 youtube_dl/extractor/porn91.py create mode 100644 youtube_dl/extractor/porncom.py create mode 100644 youtube_dl/extractor/pornhd.py create mode 100644 youtube_dl/extractor/pornhub.py create mode 100644 youtube_dl/extractor/pornotube.py create mode 100644 youtube_dl/extractor/pornovoisines.py create mode 100644 youtube_dl/extractor/pornoxo.py create mode 100644 youtube_dl/extractor/presstv.py create mode 100644 youtube_dl/extractor/promptfile.py create mode 100644 youtube_dl/extractor/prosiebensat1.py create mode 100644 youtube_dl/extractor/puhutv.py create mode 100644 youtube_dl/extractor/puls4.py create mode 100644 youtube_dl/extractor/pyvideo.py create mode 100644 youtube_dl/extractor/qqmusic.py create mode 100644 youtube_dl/extractor/r7.py create mode 100644 youtube_dl/extractor/radiobremen.py create mode 100644 youtube_dl/extractor/radiocanada.py create mode 100644 youtube_dl/extractor/radiode.py create mode 100644 youtube_dl/extractor/radiofrance.py create mode 100644 youtube_dl/extractor/radiojavan.py create mode 100644 youtube_dl/extractor/rai.py create mode 100644 youtube_dl/extractor/raywenderlich.py create mode 100644 youtube_dl/extractor/rbmaradio.py create mode 100644 youtube_dl/extractor/rds.py create mode 100644 youtube_dl/extractor/redbulltv.py create mode 100644 youtube_dl/extractor/reddit.py create mode 100644 youtube_dl/extractor/redtube.py create mode 100644 youtube_dl/extractor/regiotv.py create mode 100644 youtube_dl/extractor/rentv.py create mode 100644 youtube_dl/extractor/restudy.py create mode 100644 youtube_dl/extractor/reuters.py create mode 100644 youtube_dl/extractor/reverbnation.py create mode 100644 youtube_dl/extractor/revision3.py create mode 100644 youtube_dl/extractor/rice.py create mode 100644 youtube_dl/extractor/rmcdecouverte.py create mode 100644 youtube_dl/extractor/ro220.py create mode 100644 youtube_dl/extractor/rockstargames.py create mode 100644 youtube_dl/extractor/roosterteeth.py create mode 100644 youtube_dl/extractor/rottentomatoes.py create mode 100644 youtube_dl/extractor/roxwel.py create mode 100644 youtube_dl/extractor/rozhlas.py create mode 100644 youtube_dl/extractor/rtbf.py create mode 100644 youtube_dl/extractor/rte.py create mode 100644 youtube_dl/extractor/rtl2.py create mode 100644 youtube_dl/extractor/rtlnl.py create mode 100644 youtube_dl/extractor/rtp.py create mode 100644 youtube_dl/extractor/rts.py create mode 100644 youtube_dl/extractor/rtve.py create mode 100644 youtube_dl/extractor/rtvnh.py create mode 100644 youtube_dl/extractor/rtvs.py create mode 100644 youtube_dl/extractor/rudo.py create mode 100644 youtube_dl/extractor/ruhd.py create mode 100644 youtube_dl/extractor/rutube.py create mode 100644 youtube_dl/extractor/rutv.py create mode 100644 youtube_dl/extractor/ruutu.py create mode 100644 youtube_dl/extractor/ruv.py create mode 100644 youtube_dl/extractor/safari.py create mode 100644 youtube_dl/extractor/sapo.py create mode 100644 youtube_dl/extractor/savefrom.py create mode 100644 youtube_dl/extractor/sbs.py create mode 100644 youtube_dl/extractor/screencast.py create mode 100644 youtube_dl/extractor/screencastomatic.py create mode 100644 youtube_dl/extractor/scrippsnetworks.py create mode 100644 youtube_dl/extractor/seeker.py create mode 100644 youtube_dl/extractor/senateisvp.py create mode 100644 youtube_dl/extractor/sendtonews.py create mode 100644 youtube_dl/extractor/servingsys.py create mode 100644 youtube_dl/extractor/servus.py create mode 100644 youtube_dl/extractor/sevenplus.py create mode 100644 youtube_dl/extractor/sexu.py create mode 100644 youtube_dl/extractor/seznamzpravy.py create mode 100644 youtube_dl/extractor/shahid.py create mode 100644 youtube_dl/extractor/shared.py create mode 100644 youtube_dl/extractor/showroomlive.py create mode 100644 youtube_dl/extractor/sina.py create mode 100644 youtube_dl/extractor/sixplay.py create mode 100644 youtube_dl/extractor/sky.py create mode 100644 youtube_dl/extractor/skylinewebcams.py create mode 100644 youtube_dl/extractor/skynewsarabia.py create mode 100644 youtube_dl/extractor/slideshare.py create mode 100644 youtube_dl/extractor/slideslive.py create mode 100644 youtube_dl/extractor/slutload.py create mode 100644 youtube_dl/extractor/smotri.py create mode 100644 youtube_dl/extractor/snotr.py create mode 100644 youtube_dl/extractor/sohu.py create mode 100644 youtube_dl/extractor/sonyliv.py create mode 100644 youtube_dl/extractor/soundcloud.py create mode 100644 youtube_dl/extractor/soundgasm.py create mode 100644 youtube_dl/extractor/southpark.py create mode 100644 youtube_dl/extractor/spankbang.py create mode 100644 youtube_dl/extractor/spankwire.py create mode 100644 youtube_dl/extractor/spiegel.py create mode 100644 youtube_dl/extractor/spiegeltv.py create mode 100644 youtube_dl/extractor/spike.py create mode 100644 youtube_dl/extractor/sport5.py create mode 100644 youtube_dl/extractor/sportbox.py create mode 100644 youtube_dl/extractor/sportdeutschland.py create mode 100644 youtube_dl/extractor/springboardplatform.py create mode 100644 youtube_dl/extractor/sprout.py create mode 100644 youtube_dl/extractor/srgssr.py create mode 100644 youtube_dl/extractor/srmediathek.py create mode 100644 youtube_dl/extractor/stanfordoc.py create mode 100644 youtube_dl/extractor/steam.py create mode 100644 youtube_dl/extractor/stitcher.py create mode 100644 youtube_dl/extractor/streamable.py create mode 100644 youtube_dl/extractor/streamango.py create mode 100644 youtube_dl/extractor/streamcloud.py create mode 100644 youtube_dl/extractor/streamcz.py create mode 100644 youtube_dl/extractor/streetvoice.py create mode 100644 youtube_dl/extractor/stretchinternet.py create mode 100644 youtube_dl/extractor/stv.py create mode 100644 youtube_dl/extractor/sunporno.py create mode 100644 youtube_dl/extractor/sverigesradio.py create mode 100644 youtube_dl/extractor/svt.py create mode 100644 youtube_dl/extractor/swrmediathek.py create mode 100644 youtube_dl/extractor/syfy.py create mode 100644 youtube_dl/extractor/sztvhu.py create mode 100644 youtube_dl/extractor/tagesschau.py create mode 100644 youtube_dl/extractor/tass.py create mode 100644 youtube_dl/extractor/tastytrade.py create mode 100644 youtube_dl/extractor/tbs.py create mode 100644 youtube_dl/extractor/tdslifeway.py create mode 100644 youtube_dl/extractor/teachable.py create mode 100644 youtube_dl/extractor/teachertube.py create mode 100644 youtube_dl/extractor/teachingchannel.py create mode 100644 youtube_dl/extractor/teamcoco.py create mode 100644 youtube_dl/extractor/teamtreehouse.py create mode 100644 youtube_dl/extractor/techtalks.py create mode 100644 youtube_dl/extractor/ted.py create mode 100644 youtube_dl/extractor/tele13.py create mode 100644 youtube_dl/extractor/tele5.py create mode 100644 youtube_dl/extractor/telebruxelles.py create mode 100644 youtube_dl/extractor/telecinco.py create mode 100644 youtube_dl/extractor/telegraaf.py create mode 100644 youtube_dl/extractor/telemb.py create mode 100644 youtube_dl/extractor/telequebec.py create mode 100644 youtube_dl/extractor/teletask.py create mode 100644 youtube_dl/extractor/telewebion.py create mode 100644 youtube_dl/extractor/tennistv.py create mode 100644 youtube_dl/extractor/testurl.py create mode 100644 youtube_dl/extractor/tf1.py create mode 100644 youtube_dl/extractor/tfo.py create mode 100644 youtube_dl/extractor/theintercept.py create mode 100644 youtube_dl/extractor/theplatform.py create mode 100644 youtube_dl/extractor/thescene.py create mode 100644 youtube_dl/extractor/thestar.py create mode 100644 youtube_dl/extractor/thesun.py create mode 100644 youtube_dl/extractor/theweatherchannel.py create mode 100644 youtube_dl/extractor/thisamericanlife.py create mode 100644 youtube_dl/extractor/thisav.py create mode 100644 youtube_dl/extractor/thisoldhouse.py create mode 100644 youtube_dl/extractor/thisvid.py create mode 100644 youtube_dl/extractor/threeqsdn.py create mode 100644 youtube_dl/extractor/tiktok.py create mode 100644 youtube_dl/extractor/tinypic.py create mode 100644 youtube_dl/extractor/tmz.py create mode 100644 youtube_dl/extractor/tnaflix.py create mode 100644 youtube_dl/extractor/toggle.py create mode 100644 youtube_dl/extractor/tonline.py create mode 100644 youtube_dl/extractor/toongoggles.py create mode 100644 youtube_dl/extractor/toutv.py create mode 100644 youtube_dl/extractor/toypics.py create mode 100644 youtube_dl/extractor/traileraddict.py create mode 100644 youtube_dl/extractor/trilulilu.py create mode 100644 youtube_dl/extractor/trunews.py create mode 100644 youtube_dl/extractor/trutv.py create mode 100644 youtube_dl/extractor/tube8.py create mode 100644 youtube_dl/extractor/tubitv.py create mode 100644 youtube_dl/extractor/tudou.py create mode 100644 youtube_dl/extractor/tumblr.py create mode 100644 youtube_dl/extractor/tunein.py create mode 100644 youtube_dl/extractor/tunepk.py create mode 100644 youtube_dl/extractor/turbo.py create mode 100644 youtube_dl/extractor/turner.py create mode 100644 youtube_dl/extractor/tutv.py create mode 100644 youtube_dl/extractor/tv2.py create mode 100644 youtube_dl/extractor/tv2hu.py create mode 100644 youtube_dl/extractor/tv4.py create mode 100644 youtube_dl/extractor/tv5mondeplus.py create mode 100644 youtube_dl/extractor/tva.py create mode 100644 youtube_dl/extractor/tvanouvelles.py create mode 100644 youtube_dl/extractor/tvc.py create mode 100644 youtube_dl/extractor/tvigle.py create mode 100644 youtube_dl/extractor/tvland.py create mode 100644 youtube_dl/extractor/tvn24.py create mode 100644 youtube_dl/extractor/tvnet.py create mode 100644 youtube_dl/extractor/tvnoe.py create mode 100644 youtube_dl/extractor/tvnow.py create mode 100644 youtube_dl/extractor/tvp.py create mode 100644 youtube_dl/extractor/tvplay.py create mode 100644 youtube_dl/extractor/tvplayer.py create mode 100644 youtube_dl/extractor/tweakers.py create mode 100644 youtube_dl/extractor/twentyfourvideo.py create mode 100644 youtube_dl/extractor/twentymin.py create mode 100644 youtube_dl/extractor/twentythreevideo.py create mode 100644 youtube_dl/extractor/twitcasting.py create mode 100644 youtube_dl/extractor/twitch.py create mode 100644 youtube_dl/extractor/twitter.py create mode 100644 youtube_dl/extractor/udemy.py create mode 100644 youtube_dl/extractor/udn.py create mode 100644 youtube_dl/extractor/ufctv.py create mode 100644 youtube_dl/extractor/uktvplay.py create mode 100644 youtube_dl/extractor/umg.py create mode 100644 youtube_dl/extractor/unistra.py create mode 100644 youtube_dl/extractor/unity.py create mode 100644 youtube_dl/extractor/uol.py create mode 100644 youtube_dl/extractor/uplynk.py create mode 100644 youtube_dl/extractor/urort.py create mode 100644 youtube_dl/extractor/urplay.py create mode 100644 youtube_dl/extractor/usanetwork.py create mode 100644 youtube_dl/extractor/usatoday.py create mode 100644 youtube_dl/extractor/ustream.py create mode 100644 youtube_dl/extractor/ustudio.py create mode 100644 youtube_dl/extractor/varzesh3.py create mode 100644 youtube_dl/extractor/vbox7.py create mode 100644 youtube_dl/extractor/veehd.py create mode 100644 youtube_dl/extractor/veoh.py create mode 100644 youtube_dl/extractor/vessel.py create mode 100644 youtube_dl/extractor/vesti.py create mode 100644 youtube_dl/extractor/vevo.py create mode 100644 youtube_dl/extractor/vgtv.py create mode 100644 youtube_dl/extractor/vh1.py create mode 100644 youtube_dl/extractor/vice.py create mode 100644 youtube_dl/extractor/vidbit.py create mode 100644 youtube_dl/extractor/viddler.py create mode 100644 youtube_dl/extractor/videa.py create mode 100644 youtube_dl/extractor/videodetective.py create mode 100644 youtube_dl/extractor/videofyme.py create mode 100644 youtube_dl/extractor/videomore.py create mode 100644 youtube_dl/extractor/videopremium.py create mode 100644 youtube_dl/extractor/videopress.py create mode 100644 youtube_dl/extractor/vidio.py create mode 100644 youtube_dl/extractor/vidlii.py create mode 100644 youtube_dl/extractor/vidme.py create mode 100644 youtube_dl/extractor/vidzi.py create mode 100644 youtube_dl/extractor/vier.py create mode 100644 youtube_dl/extractor/viewlift.py create mode 100644 youtube_dl/extractor/viewster.py create mode 100644 youtube_dl/extractor/viidea.py create mode 100644 youtube_dl/extractor/viki.py create mode 100644 youtube_dl/extractor/vimeo.py create mode 100644 youtube_dl/extractor/vimple.py create mode 100644 youtube_dl/extractor/vine.py create mode 100644 youtube_dl/extractor/viqeo.py create mode 100644 youtube_dl/extractor/viu.py create mode 100644 youtube_dl/extractor/vk.py create mode 100644 youtube_dl/extractor/vlive.py create mode 100644 youtube_dl/extractor/vodlocker.py create mode 100644 youtube_dl/extractor/vodpl.py create mode 100644 youtube_dl/extractor/vodplatform.py create mode 100644 youtube_dl/extractor/voicerepublic.py create mode 100644 youtube_dl/extractor/voot.py create mode 100644 youtube_dl/extractor/voxmedia.py create mode 100644 youtube_dl/extractor/vrak.py create mode 100644 youtube_dl/extractor/vrt.py create mode 100644 youtube_dl/extractor/vrv.py create mode 100644 youtube_dl/extractor/vshare.py create mode 100644 youtube_dl/extractor/vube.py create mode 100644 youtube_dl/extractor/vuclip.py create mode 100644 youtube_dl/extractor/vvvvid.py create mode 100644 youtube_dl/extractor/vyborymos.py create mode 100644 youtube_dl/extractor/vzaar.py create mode 100644 youtube_dl/extractor/wakanim.py create mode 100644 youtube_dl/extractor/walla.py create mode 100644 youtube_dl/extractor/washingtonpost.py create mode 100644 youtube_dl/extractor/wat.py create mode 100644 youtube_dl/extractor/watchbox.py create mode 100644 youtube_dl/extractor/watchindianporn.py create mode 100644 youtube_dl/extractor/wdr.py create mode 100644 youtube_dl/extractor/webcaster.py create mode 100644 youtube_dl/extractor/webofstories.py create mode 100644 youtube_dl/extractor/weibo.py create mode 100644 youtube_dl/extractor/weiqitv.py create mode 100644 youtube_dl/extractor/wimp.py create mode 100644 youtube_dl/extractor/wistia.py create mode 100644 youtube_dl/extractor/worldstarhiphop.py create mode 100644 youtube_dl/extractor/wsj.py create mode 100644 youtube_dl/extractor/wwe.py create mode 100644 youtube_dl/extractor/xbef.py create mode 100644 youtube_dl/extractor/xboxclips.py create mode 100644 youtube_dl/extractor/xfileshare.py create mode 100644 youtube_dl/extractor/xhamster.py create mode 100644 youtube_dl/extractor/xiami.py create mode 100644 youtube_dl/extractor/ximalaya.py create mode 100644 youtube_dl/extractor/xminus.py create mode 100644 youtube_dl/extractor/xnxx.py create mode 100644 youtube_dl/extractor/xstream.py create mode 100644 youtube_dl/extractor/xtube.py create mode 100644 youtube_dl/extractor/xuite.py create mode 100644 youtube_dl/extractor/xvideos.py create mode 100644 youtube_dl/extractor/xxxymovies.py create mode 100644 youtube_dl/extractor/yahoo.py create mode 100644 youtube_dl/extractor/yandexdisk.py create mode 100644 youtube_dl/extractor/yandexmusic.py create mode 100644 youtube_dl/extractor/yandexvideo.py create mode 100644 youtube_dl/extractor/yapfiles.py create mode 100644 youtube_dl/extractor/yesjapan.py create mode 100644 youtube_dl/extractor/yinyuetai.py create mode 100644 youtube_dl/extractor/ynet.py create mode 100644 youtube_dl/extractor/youjizz.py create mode 100644 youtube_dl/extractor/youku.py create mode 100644 youtube_dl/extractor/younow.py create mode 100644 youtube_dl/extractor/youporn.py create mode 100644 youtube_dl/extractor/yourporn.py create mode 100644 youtube_dl/extractor/yourupload.py create mode 100644 youtube_dl/extractor/youtube.py create mode 100644 youtube_dl/extractor/zapiks.py create mode 100644 youtube_dl/extractor/zaq1.py create mode 100644 youtube_dl/extractor/zattoo.py create mode 100644 youtube_dl/extractor/zdf.py create mode 100644 youtube_dl/extractor/zingmp3.py create mode 100644 youtube_dl/extractor/zype.py create mode 100644 youtube_dl/jsinterp.py create mode 100644 youtube_dl/options.py create mode 100644 youtube_dl/postprocessor/__init__.py create mode 100644 youtube_dl/postprocessor/common.py create mode 100644 youtube_dl/postprocessor/embedthumbnail.py create mode 100644 youtube_dl/postprocessor/execafterdownload.py create mode 100644 youtube_dl/postprocessor/ffmpeg.py create mode 100644 youtube_dl/postprocessor/metadatafromtitle.py create mode 100644 youtube_dl/postprocessor/xattrpp.py create mode 100644 youtube_dl/socks.py create mode 100644 youtube_dl/swfinterp.py create mode 100644 youtube_dl/update.py create mode 100644 youtube_dl/utils.py create mode 100644 youtube_dl/version.py diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md new file mode 100644 index 000000000..fb0d33b8f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -0,0 +1,63 @@ +--- +name: Broken site support +about: Report broken or misfunctioning site +title: '' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a broken site support +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've checked that all provided URLs are alive and playable in a browser +- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped +- [ ] I've searched the bugtracker for similar issues including closed ones + + +## Verbose log + + + +``` +PASTE VERBOSE LOG HERE +``` + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md new file mode 100644 index 000000000..3c95565a6 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -0,0 +1,54 @@ +--- +name: Site support request +about: Request support for a new site +title: '' +labels: 'site-support-request' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a new site support request +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've checked that all provided URLs are alive and playable in a browser +- [ ] I've checked that none of provided URLs violate any copyrights +- [ ] I've searched the bugtracker for similar site support requests including closed ones + + +## Example URLs + + + +- Single video: https://www.youtube.com/watch?v=BaW_jenozKc +- Single video: https://youtu.be/BaW_jenozKc +- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md new file mode 100644 index 000000000..7410776d7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -0,0 +1,37 @@ +--- +name: Site feature request +about: Request a new functionality for a site +title: '' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a site feature request +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've searched the bugtracker for similar site feature requests including closed ones + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md new file mode 100644 index 000000000..cc52bcca6 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -0,0 +1,65 @@ +--- +name: Bug report +about: Report a bug unrelated to any particular site or extractor +title: '' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a broken site support issue +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've checked that all provided URLs are alive and playable in a browser +- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped +- [ ] I've searched the bugtracker for similar bug reports including closed ones +- [ ] I've read bugs section in FAQ + + +## Verbose log + + + +``` +PASTE VERBOSE LOG HERE +``` + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md new file mode 100644 index 000000000..bbd421b1a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -0,0 +1,38 @@ +--- +name: Feature request +about: Request a new functionality unrelated to any particular site or extractor +title: '' +labels: 'request' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a feature request +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've searched the bugtracker for similar feature requests including closed ones + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/6_question.md b/.github/ISSUE_TEMPLATE/6_question.md new file mode 100644 index 000000000..1fd7cd5dc --- /dev/null +++ b/.github/ISSUE_TEMPLATE/6_question.md @@ -0,0 +1,38 @@ +--- +name: Ask question +about: Ask youtube-dl related question +title: '' +labels: 'question' +--- + + + + +## Checklist + + + +- [ ] I'm asking a question +- [ ] I've looked through the README and FAQ for similar questions +- [ ] I've searched the bugtracker for similar questions including closed ones + + +## Question + + + +WRITE QUESTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md new file mode 100644 index 000000000..c7600d5b5 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md @@ -0,0 +1,63 @@ +--- +name: Broken site support +about: Report broken or misfunctioning site +title: '' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a broken site support +- [ ] I've verified that I'm running youtube-dl version **%(version)s** +- [ ] I've checked that all provided URLs are alive and playable in a browser +- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped +- [ ] I've searched the bugtracker for similar issues including closed ones + + +## Verbose log + + + +``` +PASTE VERBOSE LOG HERE +``` + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md new file mode 100644 index 000000000..d4988e639 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md @@ -0,0 +1,54 @@ +--- +name: Site support request +about: Request support for a new site +title: '' +labels: 'site-support-request' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a new site support request +- [ ] I've verified that I'm running youtube-dl version **%(version)s** +- [ ] I've checked that all provided URLs are alive and playable in a browser +- [ ] I've checked that none of provided URLs violate any copyrights +- [ ] I've searched the bugtracker for similar site support requests including closed ones + + +## Example URLs + + + +- Single video: https://www.youtube.com/watch?v=BaW_jenozKc +- Single video: https://youtu.be/BaW_jenozKc +- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md new file mode 100644 index 000000000..65f0a32f3 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md @@ -0,0 +1,37 @@ +--- +name: Site feature request +about: Request a new functionality for a site +title: '' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a site feature request +- [ ] I've verified that I'm running youtube-dl version **%(version)s** +- [ ] I've searched the bugtracker for similar site feature requests including closed ones + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md new file mode 100644 index 000000000..41fb14b72 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md @@ -0,0 +1,65 @@ +--- +name: Bug report +about: Report a bug unrelated to any particular site or extractor +title: '' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a broken site support issue +- [ ] I've verified that I'm running youtube-dl version **%(version)s** +- [ ] I've checked that all provided URLs are alive and playable in a browser +- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped +- [ ] I've searched the bugtracker for similar bug reports including closed ones +- [ ] I've read bugs section in FAQ + + +## Verbose log + + + +``` +PASTE VERBOSE LOG HERE +``` + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md new file mode 100644 index 000000000..b3431a7f0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md @@ -0,0 +1,38 @@ +--- +name: Feature request +about: Request a new functionality unrelated to any particular site or extractor +title: '' +labels: 'request' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a feature request +- [ ] I've verified that I'm running youtube-dl version **%(version)s** +- [ ] I've searched the bugtracker for similar feature requests including closed ones + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 000000000..e69b907d8 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,28 @@ +## Please follow the guide below + +- You will be asked some questions, please read them **carefully** and answer honestly +- Put an `x` into all the boxes [ ] relevant to your *pull request* (like that [x]) +- Use *Preview* tab to see how your *pull request* will actually look like + +--- + +### Before submitting a *pull request* make sure you have: +- [ ] At least skimmed through [adding new extractor tutorial](https://github.com/ytdl-org/youtube-dl#adding-support-for-a-new-site) and [youtube-dl coding conventions](https://github.com/ytdl-org/youtube-dl#youtube-dl-coding-conventions) sections +- [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests +- [ ] Checked the code with [flake8](https://pypi.python.org/pypi/flake8) + +### In order to be accepted and merged into youtube-dl each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check one of the following options: +- [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) +- [ ] I am not the original author of this code but it is in public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence) + +### What is the purpose of your *pull request*? +- [ ] Bug fix +- [ ] Improvement +- [ ] New extractor +- [ ] New feature + +--- + +### Description of your *pull request* and other information + +Explanation of your *pull request* in arbitrary form goes here. Please make sure the description explains the purpose and effect of your *pull request* and is worded well enough to be understood. Provide as much context and examples as possible. diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..c4870a6ba --- /dev/null +++ b/.gitignore @@ -0,0 +1,53 @@ +*.pyc +*.pyo +*.class +*~ +*.DS_Store +wine-py2exe/ +py2exe.log +*.kate-swp +build/ +dist/ +MANIFEST +README.txt +youtube-dl.1 +youtube-dl.bash-completion +youtube-dl.fish +youtube_dl/extractor/lazy_extractors.py +youtube-dl +youtube-dl.exe +youtube-dl.tar.gz +.coverage +cover/ +updates_key.pem +*.egg-info +*.srt +*.ttml +*.sbv +*.vtt +*.flv +*.mp4 +*.m4a +*.m4v +*.mp3 +*.3gp +*.wav +*.ape +*.mkv +*.swf +*.part +*.ytdl +*.swp +test/local_parameters.json +.tox +youtube-dl.zsh + +# IntelliJ related files +.idea +*.iml + +tmp/ +venv/ + +# VS Code related files +.vscode diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..6d16c2955 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,39 @@ +language: python +python: + - "2.6" + - "2.7" + - "3.2" + - "3.3" + - "3.4" + - "3.5" + - "3.6" + - "pypy" + - "pypy3" +dist: trusty +env: + - YTDL_TEST_SET=core + - YTDL_TEST_SET=download +matrix: + include: + - python: 3.7 + dist: xenial + env: YTDL_TEST_SET=core + - python: 3.7 + dist: xenial + env: YTDL_TEST_SET=download + - python: 3.8-dev + dist: xenial + env: YTDL_TEST_SET=core + - python: 3.8-dev + dist: xenial + env: YTDL_TEST_SET=download + - env: JYTHON=true; YTDL_TEST_SET=core + - env: JYTHON=true; YTDL_TEST_SET=download + fast_finish: true + allow_failures: + - env: YTDL_TEST_SET=download + - env: JYTHON=true; YTDL_TEST_SET=core + - env: JYTHON=true; YTDL_TEST_SET=download +before_install: + - if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi +script: ./devscripts/run_tests.sh diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 000000000..b507cb8df --- /dev/null +++ b/AUTHORS @@ -0,0 +1,248 @@ +Ricardo Garcia Gonzalez +Danny Colligan +Benjamin Johnson +Vasyl' Vavrychuk +Witold Baryluk +Paweł Paprota +Gergely Imreh +Rogério Brito +Philipp Hagemeister +Sören Schulze +Kevin Ngo +Ori Avtalion +shizeeg +Filippo Valsorda +Christian Albrecht +Dave Vasilevsky +Jaime Marquínez Ferrándiz +Jeff Crouse +Osama Khalid +Michael Walter +M. Yasoob Ullah Khalid +Julien Fraichard +Johny Mo Swag +Axel Noack +Albert Kim +Pierre Rudloff +Huarong Huo +Ismael Mejía +Steffan Donal +Andras Elso +Jelle van der Waa +Marcin Cieślak +Anton Larionov +Takuya Tsuchida +Sergey M. +Michael Orlitzky +Chris Gahan +Saimadhav Heblikar +Mike Col +Oleg Prutz +pulpe +Andreas Schmitz +Michael Kaiser +Niklas Laxström +David Triendl +Anthony Weems +David Wagner +Juan C. Olivares +Mattias Harrysson +phaer +Sainyam Kapoor +Nicolas Évrard +Jason Normore +Hoje Lee +Adam Thalhammer +Georg Jähnig +Ralf Haring +Koki Takahashi +Ariset Llerena +Adam Malcontenti-Wilson +Tobias Bell +Naglis Jonaitis +Charles Chen +Hassaan Ali +Dobrosław Żybort +David Fabijan +Sebastian Haas +Alexander Kirk +Erik Johnson +Keith Beckman +Ole Ernst +Aaron McDaniel (mcd1992) +Magnus Kolstad +Hari Padmanaban +Carlos Ramos +5moufl +lenaten +Dennis Scheiba +Damon Timm +winwon +Xavier Beynon +Gabriel Schubiner +xantares +Jan Matějka +Mauroy Sébastien +William Sewell +Dao Hoang Son +Oskar Jauch +Matthew Rayfield +t0mm0 +Tithen-Firion +Zack Fernandes +cryptonaut +Adrian Kretz +Mathias Rav +Petr Kutalek +Will Glynn +Max Reimann +Cédric Luthi +Thijs Vermeir +Joel Leclerc +Christopher Krooss +Ondřej Caletka +Dinesh S +Johan K. Jensen +Yen Chi Hsuan +Enam Mijbah Noor +David Luhmer +Shaya Goldberg +Paul Hartmann +Frans de Jonge +Robin de Rooij +Ryan Schmidt +Leslie P. Polzer +Duncan Keall +Alexander Mamay +Devin J. Pohly +Eduardo Ferro Aldama +Jeff Buchbinder +Amish Bhadeshia +Joram Schrijver +Will W. +Mohammad Teimori Pabandi +Roman Le Négrate +Matthias Küch +Julian Richen +Ping O. +Mister Hat +Peter Ding +jackyzy823 +George Brighton +Remita Amine +Aurélio A. Heckert +Bernhard Minks +sceext +Zach Bruggeman +Tjark Saul +slangangular +Behrouz Abbasi +ngld +nyuszika7h +Shaun Walbridge +Lee Jenkins +Anssi Hannula +Lukáš Lalinský +Qijiang Fan +Rémy Léone +Marco Ferragina +reiv +Muratcan Simsek +Evan Lu +flatgreen +Brian Foley +Vignesh Venkat +Tom Gijselinck +Founder Fang +Andrew Alexeyew +Saso Bezlaj +Erwin de Haan +Jens Wille +Robin Houtevelts +Patrick Griffis +Aidan Rowe +mutantmonkey +Ben Congdon +Kacper Michajłow +José Joaquín Atria +Viťas Strádal +Kagami Hiiragi +Philip Huppert +blahgeek +Kevin Deldycke +inondle +Tomáš Čech +Déstin Reed +Roman Tsiupa +Artur Krysiak +Jakub Adam Wieczorek +Aleksandar Topuzović +Nehal Patel +Rob van Bekkum +Petr Zvoníček +Pratyush Singh +Aleksander Nitecki +Sebastian Blunt +Matěj Cepl +Xie Yanbo +Philip Xu +John Hawkinson +Rich Leeper +Zhong Jianxin +Thor77 +Mattias Wadman +Arjan Verwer +Costy Petrisor +Logan B +Alex Seiler +Vijay Singh +Paul Hartmann +Stephen Chen +Fabian Stahl +Bagira +Odd Stråbø +Philip Herzog +Thomas Christlieb +Marek Rusinowski +Tobias Gruetzmacher +Olivier Bilodeau +Lars Vierbergen +Juanjo Benages +Xiao Di Guan +Thomas Winant +Daniel Twardowski +Jeremie Jarosh +Gerard Rovira +Marvin Ewald +Frédéric Bournival +Timendum +gritstub +Adam Voss +Mike Fährmann +Jan Kundrát +Giuseppe Fabiano +Örn Guðjónsson +Parmjit Virk +Genki Sky +Ľuboš Katrinec +Corey Nicholson +Ashutosh Chaudhary +John Dong +Tatsuyuki Ishi +Daniel Weber +Kay Bouché +Yang Hongbo +Lei Wang +Petr Novák +Leonardo Taccari +Martin Weinelt +Surya Oktafendri +TingPing +Alexandre Macabies +Bastian de Groot +Niklas Haas +András Veres-Szentkirályi +Enes Solak +Nathan Rossi +Thomas van der Berg +Luca Cherubin diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..cd9ccbe96 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,368 @@ +**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: +``` +$ youtube-dl -v +[debug] System config: [] +[debug] User config: [] +[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj'] +[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 +[debug] youtube-dl version 2015.12.06 +[debug] Git HEAD: 135392e +[debug] Python version 2.6.6 - Windows-2003Server-5.2.3790-SP2 +[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 +[debug] Proxy map: {} +... +``` +**Do not post screenshots of verbose logs; only plain text is acceptable.** + +The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever. + +Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist): + +### Is the description of the issue itself sufficient? + +We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts. + +So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious + +- What the problem is +- How it could be fixed +- How your proposed solution would look like + +If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over. + +For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. + +If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/). + +**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL. + +### Are you using the latest version? + +Before reporting any issue, type `youtube-dl -U`. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well. + +### Is the issue already documented? + +Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. + +### Why are existing options not enough? + +Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. + +### Is there enough context in your bug report? + +People want to solve problems, and often think they do us a favor by breaking down their larger problems (e.g. wanting to skip already downloaded files) to a specific request (e.g. requesting us to look whether the file exists before downloading the info page). However, what often happens is that they break down the problem into two steps: One simple, and one impossible (or extremely complicated one). + +We are then presented with a very complicated request when the original problem could be solved far easier, e.g. by recording the downloaded video IDs in a separate file. To avoid this, you must include the greater context where it is non-obvious. In particular, every feature request that does not consist of adding support for a new site should contain a use case scenario that explains in what situation the missing feature would be useful. + +### Does the issue involve one problem, and one problem only? + +Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones. + +In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service. + +### Is anyone going to need the feature? + +Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. + +### Is your question about youtube-dl? + +It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different, or even the reporter's own, application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug. + +# DEVELOPER INSTRUCTIONS + +Most users do not need to build youtube-dl and can [download the builds](https://ytdl-org.github.io/youtube-dl/download.html) or get them from their distribution. + +To run youtube-dl as a developer, you don't need to build anything either. Simply execute + + python -m youtube_dl + +To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work: + + python -m unittest discover + python test/test_download.py + nosetests + +See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases. + +If you want to create a build of youtube-dl yourself, you'll need + +* python +* make (only GNU make is supported) +* pandoc +* zip +* nosetests + +### Adding support for a new site + +If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**. + +After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`): + +1. [Fork this repository](https://github.com/ytdl-org/youtube-dl/fork) +2. Check out the source code with: + + git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git + +3. Start a new git branch with + + cd youtube-dl + git checkout -b yourextractor + +4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`: + + ```python + # coding: utf-8 + from __future__ import unicode_literals + + from .common import InfoExtractor + + + class YourExtractorIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' + _TEST = { + 'url': 'https://yourextractor.com/watch/42', + 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'info_dict': { + 'id': '42', + 'ext': 'mp4', + 'title': 'Video title goes here', + 'thumbnail': r're:^https?://.*\.jpg$', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + # TODO more code goes here, for example ... + title = self._html_search_regex(r'

(.+?)

', webpage, 'title') + + return { + 'id': video_id, + 'title': title, + 'description': self._og_search_description(webpage), + 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), + # TODO more properties (see youtube_dl/extractor/common.py) + } + ``` +5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). +6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. +7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. +8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart): + + $ flake8 youtube_dl/extractor/yourextractor.py + +9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. +10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: + + $ git add youtube_dl/extractor/extractors.py + $ git add youtube_dl/extractor/yourextractor.py + $ git commit -m '[yourextractor] Add new extractor' + $ git push origin yourextractor + +11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. + +In any case, thank you very much for your contributions! + +## youtube-dl coding conventions + +This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code. + +Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all. + +### Mandatory and optional metafields + +For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl: + + - `id` (media identifier) + - `title` (media title) + - `url` (media download URL) or `formats` + +In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. + +[Any field](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. + +#### Example + +Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`: + +```python +meta = self._download_json(url, video_id) +``` + +Assume at this point `meta`'s layout is: + +```python +{ + ... + "summary": "some fancy summary text", + ... +} +``` + +Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional meta field you should be ready that this key may be missing from the `meta` dict, so that you should extract it like: + +```python +description = meta.get('summary') # correct +``` + +and not like: + +```python +description = meta['summary'] # incorrect +``` + +The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data). + +Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance: + +```python +description = self._search_regex( + r']+id="title"[^>]*>([^<]+)<', + webpage, 'description', fatal=False) +``` + +With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction. + +You can also pass `default=`, for example: + +```python +description = self._search_regex( + r']+id="title"[^>]*>([^<]+)<', + webpage, 'description', default=None) +``` + +On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present. + +### Provide fallbacks + +When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable. + +#### Example + +Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like: + +```python +title = meta['title'] +``` + +If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected. + +Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario: + +```python +title = meta.get('title') or self._og_search_title(webpage) +``` + +This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`. + +### Regular expressions + +#### Don't capture groups you don't use + +Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing. + +##### Example + +Don't capture id attribute name here since you can't use it for anything anyway. + +Correct: + +```python +r'(?:id|ID)=(?P\d+)' +``` + +Incorrect: +```python +r'(id|ID)=(?P\d+)' +``` + + +#### Make regular expressions relaxed and flexible + +When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on. + +##### Example + +Say you need to extract `title` from the following HTML code: + +```html +some fancy title +``` + +The code for that task should look similar to: + +```python +title = self._search_regex( + r']+class="title"[^>]*>([^<]+)', webpage, 'title') +``` + +Or even better: + +```python +title = self._search_regex( + r']+class=(["\'])title\1[^>]*>(?P[^<]+)', + webpage, 'title', group='title') +``` + +Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute: + +The code definitely should not look like: + +```python +title = self._search_regex( + r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>', + webpage, 'title', group='title') +``` + +### Long lines policy + +There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse. + +For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit: + +Correct: + +```python +'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' +``` + +Incorrect: + +```python +'https://www.youtube.com/watch?v=FqZTN594JQw&list=' +'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' +``` + +### Use convenience conversion and parsing functions + +Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. + +Use `url_or_none` for safe URL processing. + +Use `try_get` for safe metadata extraction from parsed JSON. + +Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. + +Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. + +#### More examples + +##### Safely extract optional description from parsed JSON +```python +description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str) +``` + +##### Safely extract more optional metadata +```python +video = try_get(response, lambda x: x['result']['video'][0], dict) or {} +description = video.get('summary') +duration = float_or_none(video.get('durationMs'), scale=1000) +view_count = int_or_none(video.get('views')) +``` + diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 000000000..5ce78b07a --- /dev/null +++ b/ChangeLog @@ -0,0 +1,4470 @@ +version 2019.07.02 + +Core ++ [utils] Introduce random_user_agent and use as default User-Agent (#21546) + +Extractors ++ [vevo] Add support for embed.vevo.com URLs (#21565) ++ [openload] Add support for oload.biz (#21574) +* [xiami] Update API base URL (#21575) +* [yourporn] Fix extraction (#21585) ++ [acast] Add support for URLs with episode id (#21444) ++ [dailymotion] Add support for DM.player embeds +* [soundcloud] Update client id + + +version 2019.06.27 + +Extractors ++ [go] Add support for disneynow.com (#21528) +* [mixer:vod] Relax URL regular expression (#21531, #21536) +* [drtv] Relax URL regular expression +* [fusion] Fix extraction (#17775, #21269) +- [nfb] Remove extractor (#21518) ++ [beeg] Add support for api/v6 v2 URLs (#21511) ++ [brightcove:new] Add support for playlists (#21331) ++ [openload] Add support for oload.life (#21495) +* [vimeo:channel,group] Make title extraction non fatal +* [vimeo:likes] Implement extrator in terms of channel extractor (#21493) ++ [pornhub] Add support for more paged video sources ++ [pornhub] Add support for downloading single pages and search pages (#15570) +* [pornhub] Rework extractors (#11922, #16078, #17454, #17936) ++ [youtube] Add another signature function pattern +* [tf1] Fix extraction (#21365, #21372) +* [crunchyroll] Move Accept-Language workaround to video extractor since + it causes playlists not to list any videos +* [crunchyroll:playlist] Fix and relax title extraction (#21291, #21443) + + +version 2019.06.21 + +Core +* [utils] Restrict parse_codecs and add theora as known vcodec (#21381) + +Extractors +* [youtube] Update signature function patterns (#21469, #21476) +* [youtube] Make --write-annotations non fatal (#21452) ++ [sixplay] Add support for rtlmost.hu (#21405) +* [youtube] Hardcode codec metadata for av01 video only formats (#21381) +* [toutv] Update client key (#21370) ++ [biqle] Add support for new embed domain +* [cbs] Improve DRM protected videos detection (#21339) + + +version 2019.06.08 + +Core +* [downloader/common] Improve rate limit (#21301) +* [utils] Improve strip_or_none +* [extractor/common] Strip src attribute for HTML5 entries code (#18485, + #21169) + +Extractors +* [ted] Fix playlist extraction (#20844, #21032) +* [vlive:playlist] Fix video extraction when no playlist is found (#20590) ++ [vlive] Add CH+ support (#16887, #21209) ++ [openload] Add support for oload.website (#21329) ++ [tvnow] Extract HD formats (#21201) ++ [redbulltv] Add support for rrn:content URLs (#21297) +* [youtube] Fix average rating extraction (#21304) ++ [bitchute] Extract HTML5 formats (#21306) +* [cbsnews] Fix extraction (#9659, #15397) +* [vvvvid] Relax URL regular expression (#21299) ++ [prosiebensat1] Add support for new API (#21272) ++ [vrv] Extract adaptive_hls formats (#21243) +* [viki] Switch to HTTPS (#21001) +* [LiveLeak] Check if the original videos exist (#21206, #21208) +* [rtp] Fix extraction (#15099) +* [youtube] Improve DRM protected videos detection (#1774) ++ [srgssrplay] Add support for popupvideoplayer URLs (#21155) ++ [24video] Add support for porno.24video.net (#21194) ++ [24video] Add support for 24video.site (#21193) +- [pornflip] Remove extractor +- [criterion] Remove extractor (#21195) +* [pornhub] Use HTTPS (#21061) +* [bitchute] Fix uploader extraction (#21076) +* [streamcloud] Reduce waiting time to 6 seconds (#21092) +- [novamov] Remove extractors (#21077) ++ [openload] Add support for oload.press (#21135) +* [vivo] Fix extraction (#18906, #19217) + + +version 2019.05.20 + +Core ++ [extractor/common] Move workaround for applying first Set-Cookie header + into a separate _apply_first_set_cookie_header method + +Extractors +* [safari] Fix authentication (#21090) +* [vk] Use _apply_first_set_cookie_header +* [vrt] Fix extraction (#20527) ++ [canvas] Add support for vrtnieuws and sporza site ids and extract + AES HLS formats ++ [vrv] Extract captions (#19238) +* [tele5] Improve video id extraction +* [tele5] Relax URL regular expression (#21020, #21063) +* [svtplay] Update API URL (#21075) ++ [yahoo:gyao] Add X-User-Agent header to dam proxy requests (#21071) + + +version 2019.05.11 + +Core +* [utils] Transliterate "þ" as "th" (#20897) + +Extractors ++ [cloudflarestream] Add support for videodelivery.net (#21049) ++ [byutv] Add support for DVR videos (#20574, #20676) ++ [gfycat] Add support for URLs with tags (#20696, #20731) ++ [openload] Add support for verystream.com (#20701, #20967) +* [youtube] Use sp field value for signature field name (#18841, #18927, + #21028) ++ [yahoo:gyao] Extend URL regular expression (#21008) +* [youtube] Fix channel id extraction (#20982, #21003) ++ [sky] Add support for news.sky.com (#13055) ++ [youtube:entrylistbase] Retry on 5xx HTTP errors (#20965) ++ [francetvinfo] Extend video id extraction (#20619, #20740) +* [4tube] Update token hosts (#20918) +* [hotstar] Move to API v2 (#20931) +* [fox] Fix API error handling under python 2 (#20925) ++ [redbulltv] Extend URL regular expression (#20922) + + +version 2019.04.30 + +Extractors +* [openload] Use real Chrome versions (#20902) +- [youtube] Remove info el for get_video_info request +* [youtube] Improve extraction robustness +- [dramafever] Remove extractor (#20868) +* [adn] Fix subtitle extraction (#12724) ++ [ccc] Extract creator (#20355) ++ [ccc:playlist] Add support for media.ccc.de playlists (#14601, #20355) ++ [sverigesradio] Add support for sverigesradio.se (#18635) ++ [cinemax] Add support for cinemax.com +* [sixplay] Try extracting non-DRM protected manifests (#20849) ++ [youtube] Extract Youtube Music Auto-generated metadata (#20599, #20742) +- [wrzuta] Remove extractor (#20684, #20801) +* [twitch] Prefer source format (#20850) ++ [twitcasting] Add support for private videos (#20843) +* [reddit] Validate thumbnail URL (#20030) +* [yandexmusic] Fix track URL extraction (#20820) + + +version 2019.04.24 + +Extractors +* [youtube] Fix extraction (#20758, #20759, #20761, #20762, #20764, #20766, + #20767, #20769, #20771, #20768, #20770) +* [toutv] Fix extraction and extract series info (#20757) ++ [vrv] Add support for movie listings (#19229) ++ [youtube] Print error when no data is available (#20737) ++ [soundcloud] Add support for new rendition and improve extraction (#20699) ++ [ooyala] Add support for geo verification proxy ++ [nrl] Add support for nrl.com (#15991) ++ [vimeo] Extract live archive source format (#19144) ++ [vimeo] Add support for live streams and improve info extraction (#19144) ++ [ntvcojp] Add support for cu.ntv.co.jp ++ [nhk] Extract RTMPT format ++ [nhk] Add support for audio URLs ++ [udemy] Add another course id extraction pattern (#20491) ++ [openload] Add support for oload.services (#20691) ++ [openload] Add support for openloed.co (#20691, #20693) +* [bravotv] Fix extraction (#19213) + + +version 2019.04.17 + +Extractors +* [openload] Randomize User-Agent (closes #20688) ++ [openload] Add support for oladblock domains (#20471) +* [adn] Fix subtitle extraction (#12724) ++ [aol] Add support for localized websites ++ [yahoo] Add support GYAO episode URLs ++ [yahoo] Add support for streaming.yahoo.co.jp (#5811, #7098) ++ [yahoo] Add support for gyao.yahoo.co.jp +* [aenetworks] Fix history topic extraction and extract more formats ++ [cbs] Extract smpte and vtt subtitles ++ [streamango] Add support for streamcherry.com (#20592) ++ [yourporn] Add support for sxyprn.com (#20646) +* [mgtv] Fix extraction (#20650) +* [linkedin:learning] Use urljoin for form action URL (#20431) ++ [gdc] Add support for kaltura embeds (#20575) +* [dispeak] Improve mp4 bitrate extraction +* [kaltura] Sanitize embed URLs +* [jwplatfom] Do not match manifest URLs (#20596) +* [aol] Restrict URL regular expression and improve format extraction ++ [tiktok] Add support for new URL schema (#20573) ++ [stv:player] Add support for player.stv.tv (#20586) + + +version 2019.04.07 + +Core ++ [downloader/external] Pass rtmp_conn to ffmpeg + +Extractors ++ [ruutu] Add support for audio podcasts (#20473, #20545) ++ [xvideos] Extract all thumbnails (#20432) ++ [platzi] Add support for platzi.com (#20562) +* [dvtv] Fix extraction (#18514, #19174) ++ [vrv] Add basic support for individual movie links (#19229) ++ [bfi:player] Add support for player.bfi.org.uk (#19235) +* [hbo] Fix extraction and extract subtitles (#14629, #13709) +* [youtube] Extract srv[1-3] subtitle formats (#20566) +* [adultswim] Fix extraction (#18025) +* [teamcoco] Fix extraction and add suport for subdomains (#17099, #20339) +* [adn] Fix subtitle compatibility with ffmpeg +* [adn] Fix extraction and add support for positioning styles (#20549) +* [vk] Use unique video id (#17848) +* [newstube] Fix extraction +* [rtl2] Actualize extraction ++ [adobeconnect] Add support for adobeconnect.com (#20283) ++ [gaia] Add support for authentication (#14605) ++ [mediasite] Add support for dashed ids and named catalogs (#20531) + + +version 2019.04.01 + +Core +* [utils] Improve int_or_none and float_or_none (#20403) +* Check for valid --min-sleep-interval when --max-sleep-interval is specified + (#20435) + +Extractors ++ [weibo] Extend URL regular expression (#20496) ++ [xhamster] Add support for xhamster.one (#20508) ++ [mediasite] Add support for catalogs (#20507) ++ [teamtreehouse] Add support for teamtreehouse.com (#9836) ++ [ina] Add support for audio URLs +* [ina] Improve extraction +* [cwtv] Fix episode number extraction (#20461) +* [npo] Improve DRM detection ++ [pornhub] Add support for DASH formats (#20403) +* [svtplay] Update API endpoint (#20430) + + +version 2019.03.18 + +Core +* [extractor/common] Improve HTML5 entries extraction ++ [utils] Introduce parse_bitrate +* [update] Hide update URLs behind redirect +* [extractor/common] Fix url meta field for unfragmented DASH formats (#20346) + +Extractors ++ [yandexvideo] Add extractor +* [openload] Improve embed detection ++ [corus] Add support for bigbrothercanada.ca (#20357) ++ [orf:radio] Extract series (#20012) ++ [cbc:watch] Add support for gem.cbc.ca (#20251, #20359) +- [anysex] Remove extractor (#19279) ++ [ciscolive] Add support for new URL schema (#20320, #20351) ++ [youtube] Add support for invidiou.sh (#20309) +- [anitube] Remove extractor (#20334) +- [ruleporn] Remove extractor (#15344, #20324) +* [npr] Fix extraction (#10793, #13440) +* [biqle] Fix extraction (#11471, #15313) +* [viddler] Modernize +* [moevideo] Fix extraction +* [primesharetv] Remove extractor +* [hypem] Modernize and extract more metadata (#15320) +* [veoh] Fix extraction +* [escapist] Modernize +- [videomega] Remove extractor (#10108) ++ [beeg] Add support for beeg.porn (#20306) +* [vimeo:review] Improve config url extraction and extract original format + (#20305) +* [fox] Detect geo restriction and authentication errors (#20208) + + +version 2019.03.09 + +Core +* [extractor/common] Use compat_etree_Element ++ [compat] Introduce compat_etree_Element +* [extractor/common] Fallback url to base URL for DASH formats +* [extractor/common] Do not fail on invalid data while parsing F4M manifest + in non fatal mode +* [extractor/common] Return MPD manifest as format's url meta field (#20242) +* [utils] Strip #HttpOnly_ prefix from cookies files (#20219) + +Extractors +* [francetv:site] Relax video id regular expression (#20268) +* [toutv] Detect invalid login error +* [toutv] Fix authentication (#20261) ++ [urplay] Extract timestamp (#20235) ++ [openload] Add support for oload.space (#20246) +* [facebook] Improve uploader extraction (#20250) +* [bbc] Use compat_etree_Element +* [crunchyroll] Use compat_etree_Element +* [npo] Improve ISM extraction +* [rai] Improve extraction (#20253) +* [paramountnetwork] Fix mgid extraction (#20241) +* [libsyn] Improve extraction (#20229) ++ [youtube] Add more invidious instances to URL regular expression (#20228) +* [spankbang] Fix extraction (#20023) +* [espn] Extend URL regular expression (#20013) +* [sixplay] Handle videos with empty assets (#20016) ++ [vimeo] Add support for Vimeo Pro portfolio protected videos (#20070) + + +version 2019.03.01 + +Core ++ [downloader/external] Add support for rate limit and retries for wget +* [downloader/external] Fix infinite retries for curl (#19303) + +Extractors +* [npo] Fix extraction (#20084) +* [francetv:site] Extend video id regex (#20029, #20071) ++ [periscope] Extract width and height (#20015) +* [servus] Fix extraction (#19297) +* [bbccouk] Make subtitles non fatal (#19651) +* [metacafe] Fix family filter bypass (#19287) + + +version 2019.02.18 + +Extractors +* [tvp:website] Fix and improve extraction ++ [tvp] Detect unavailable videos +* [tvp] Fix description extraction and make thumbnail optional ++ [linuxacademy] Add support for linuxacademy.com (#12207) +* [bilibili] Update keys (#19233) +* [udemy] Extend URL regular expressions (#14330, #15883) +* [udemy] Update User-Agent and detect captcha (#14713, #15839, #18126) +* [noovo] Fix extraction (#19230) +* [rai] Relax URL regular expression (#19232) ++ [vshare] Pass Referer to download request (#19205, #19221) ++ [openload] Add support for oload.live (#19222) +* [imgur] Use video id as title fallback (#18590) ++ [twitch] Add new source format detection approach (#19193) +* [tvplayhome] Fix video id extraction (#19190) +* [tvplayhome] Fix episode metadata extraction (#19190) +* [rutube:embed] Fix extraction (#19163) ++ [rutube:embed] Add support private videos (#19163) ++ [soundcloud] Extract more metadata ++ [trunews] Add support for trunews.com (#19153) ++ [linkedin:learning] Extract chapter_number and chapter_id (#19162) + + +version 2019.02.08 + +Core +* [utils] Improve JSON-LD regular expression (#18058) +* [YoutubeDL] Fallback to ie_key of matching extractor while making + download archive id when no explicit ie_key is provided (#19022) + +Extractors ++ [malltv] Add support for mall.tv (#18058, #17856) ++ [spankbang:playlist] Add support for playlists (#19145) +* [spankbang] Extend URL regular expression +* [trutv] Fix extraction (#17336) +* [toutv] Fix authentication (#16398, #18700) +* [pornhub] Fix tags and categories extraction (#13720, #19135) +* [pornhd] Fix formats extraction ++ [pornhd] Extract like count (#19123, #19125) +* [radiocanada] Switch to the new media requests (#19115) ++ [teachable] Add support for courses.workitdaily.com (#18871) +- [vporn] Remove extractor (#16276) ++ [soundcloud:pagedplaylist] Add ie and title to entries (#19022, #19086) ++ [drtuber] Extract duration (#19078) +* [soundcloud] Fix paged playlists extraction, add support for albums and update client id +* [soundcloud] Update client id +* [drtv] Improve preference (#19079) ++ [openload] Add support for openload.pw and oload.pw (#18930) ++ [openload] Add support for oload.info (#19073) +* [crackle] Authorize media detail request (#16931) + + +version 2019.01.30.1 + +Core +* [postprocessor/ffmpeg] Fix avconv processing broken in #19025 (#19067) + + +version 2019.01.30 + +Core +* [postprocessor/ffmpeg] Do not copy Apple TV chapter tracks while embedding + subtitles (#19024, #19042) +* [postprocessor/ffmpeg] Disable "Last message repeated" messages (#19025) + +Extractors +* [yourporn] Fix extraction and extract duration (#18815, #18852, #19061) +* [drtv] Improve extraction (#19039) + + Add support for EncryptedUri videos + + Extract more metadata + * Fix subtitles extraction ++ [fox] Add support for locked videos using cookies (#19060) +* [fox] Fix extraction for free videos (#19060) ++ [zattoo] Add support for tv.salt.ch (#19059) + + +version 2019.01.27 + +Core ++ [extractor/common] Extract season in _json_ld +* [postprocessor/ffmpeg] Fallback to ffmpeg/avconv for audio codec detection + (#681) + +Extractors +* [vice] Fix extraction for locked videos (#16248) ++ [wakanim] Detect DRM protected videos ++ [wakanim] Add support for wakanim.tv (#14374) +* [usatoday] Fix extraction for videos with custom brightcove partner id + (#18990) +* [drtv] Fix extraction (#18989) +* [nhk] Extend URL regular expression (#18968) +* [go] Fix Adobe Pass requests for Disney Now (#18901) ++ [openload] Add support for oload.club (#18969) + + +version 2019.01.24 + +Core +* [YoutubeDL] Fix negation for string operators in format selection (#18961) + + +version 2019.01.23 + +Core +* [utils] Fix urljoin for paths with non-http(s) schemes +* [extractor/common] Improve jwplayer relative URL handling (#18892) ++ [YoutubeDL] Add negation support for string comparisons in format selection + expressions (#18600, #18805) +* [extractor/common] Improve HLS video-only format detection (#18923) + +Extractors +* [crunchyroll] Extend URL regular expression (#18955) +* [pornhub] Bypass scrape detection (#4822, #5930, #7074, #10175, #12722, + #17197, #18338 #18842, #18899) ++ [vrv] Add support for authentication (#14307) +* [videomore:season] Fix extraction +* [videomore] Improve extraction (#18908) ++ [tnaflix] Pass Referer in metadata request (#18925) +* [radiocanada] Relax DRM check (#18608, #18609) +* [vimeo] Fix video password verification for videos protected by + Referer HTTP header ++ [hketv] Add support for hkedcity.net (#18696) ++ [streamango] Add support for fruithosts.net (#18710) ++ [instagram] Add support for tags (#18757) ++ [odnoklassniki] Detect paid videos (#18876) +* [ted] Correct acodec for HTTP formats (#18923) +* [cartoonnetwork] Fix extraction (#15664, #17224) +* [vimeo] Fix extraction for password protected player URLs (#18889) + + +version 2019.01.17 + +Extractors +* [youtube] Extend JS player signature function name regular expressions + (#18890, #18891, #18893) + + +version 2019.01.16 + +Core ++ [test/helper] Add support for maxcount and count collection len checkers +* [downloader/hls] Fix uplynk ad skipping (#18824) +* [postprocessor/ffmpeg] Improve ffmpeg version parsing (#18813) + +Extractors +* [youtube] Skip unsupported adaptive stream type (#18804) ++ [youtube] Extract DASH formats from player response (#18804) +* [funimation] Fix extraction (#14089) +* [skylinewebcams] Fix extraction (#18853) ++ [curiositystream] Add support for non app URLs ++ [bitchute] Check formats (#18833) +* [wistia] Extend URL regular expression (#18823) ++ [playplustv] Add support for playplus.com (#18789) + + +version 2019.01.10 + +Core +* [extractor/common] Use episode name as title in _json_ld ++ [extractor/common] Add support for movies in _json_ld +* [postprocessor/ffmpeg] Embed subtitles with non-standard language codes + (#18765) ++ [utils] Add language codes replaced in 1989 revision of ISO 639 + to ISO639Utils (#18765) + +Extractors +* [youtube] Extract live HLS URL from player response (#18799) ++ [outsidetv] Add support for outsidetv.com (#18774) +* [jwplatform] Use JW Platform Delivery API V2 and add support for more URLs ++ [fox] Add support National Geographic (#17985, #15333, #14698) ++ [playplustv] Add support for playplus.tv (#18789) +* [globo] Set GLBID cookie manually (#17346) ++ [gaia] Add support for gaia.com (#14605) +* [youporn] Fix title and description extraction (#18748) ++ [hungama] Add support for hungama.com (#17402, #18771) +* [dtube] Fix extraction (#18741) +* [tvnow] Fix and rework extractors and prepare for a switch to the new API + (#17245, #18499) +* [carambatv:page] Fix extraction (#18739) + + +version 2019.01.02 + +Extractors +* [discovery] Use geo verification headers (#17838) ++ [packtpub] Add support for subscription.packtpub.com (#18718) +* [yourporn] Fix extraction (#18583) ++ [acast:channel] Add support for play.acast.com (#18587) ++ [extractors] Add missing age limits (#18621) ++ [rmcdecouverte] Add support for live stream +* [rmcdecouverte] Bypass geo restriction +* [rmcdecouverte] Update URL regular expression (#18595, 18697) +* [manyvids] Fix extraction (#18604, #18614) +* [bitchute] Fix extraction (#18567) + + +version 2018.12.31 + +Extractors ++ [bbc] Add support for another embed pattern (#18643) ++ [npo:live] Add support for npostart.nl (#18644) +* [beeg] Fix extraction (#18610, #18626) +* [youtube] Unescape HTML for series (#18641) ++ [youtube] Extract more format metadata +* [youtube] Detect DRM protected videos (#1774) +* [youtube] Relax HTML5 player regular expressions (#18465, #18466) +* [youtube] Extend HTML5 player regular expression (#17516) ++ [liveleak] Add support for another embed type and restore original + format extraction ++ [crackle] Extract ISM and HTTP formats ++ [twitter] Pass Referer with card request (#18579) +* [mediasite] Extend URL regular expression (#18558) ++ [lecturio] Add support for lecturio.de (#18562) ++ [discovery] Add support for Scripps Networks watch domains (#17947) + + +version 2018.12.17 + +Extractors +* [ard:beta] Improve geo restricted videos extraction +* [ard:beta] Fix subtitles extraction +* [ard:beta] Improve extraction robustness +* [ard:beta] Relax URL regular expression (#18441) +* [acast] Add support for embed.acast.com and play.acast.com (#18483) +* [iprima] Relax URL regular expression (#18515, #18540) +* [vrv] Fix initial state extraction (#18553) +* [youtube] Fix mark watched (#18546) ++ [safari] Add support for learning.oreilly.com (#18510) +* [youtube] Fix multifeed extraction (#18531) +* [lecturio] Improve subtitles extraction (#18488) +* [uol] Fix format URL extraction (#18480) ++ [ard:mediathek] Add support for classic.ardmediathek.de (#18473) + + +version 2018.12.09 + +Core +* [YoutubeDL] Keep session cookies in cookie file between runs +* [YoutubeDL] Recognize session cookies with expired set to 0 (#12929) + +Extractors ++ [teachable] Add support for teachable platform sites (#5451, #18150, #18272) ++ [aenetworks] Add support for historyvault.com (#18460) +* [imgur] Improve gallery and album detection and extraction (#9133, #16577, + #17223, #18404) +* [iprima] Relax URL regular expression (#18453) +* [hotstar] Fix video data extraction (#18386) +* [ard:mediathek] Fix title and description extraction (#18349, #18371) +* [xvideos] Switch to HTTPS (#18422, #18427) ++ [lecturio] Add support for lecturio.com (#18405) ++ [nrktv:series] Add support for extra materials +* [nrktv:season,series] Fix extraction (#17159, #17258) +* [nrktv] Relax URL regular expression (#18304, #18387) +* [yourporn] Fix extraction (#18424, #18425) +* [tbs] Fix info extraction (#18403) ++ [gamespot] Add support for review URLs + + +version 2018.12.03 + +Core +* [utils] Fix random_birthday to generate existing dates only (#18284) + +Extractors ++ [tiktok] Add support for tiktok.com (#18108, #18135) +* [pornhub] Use actual URL host for requests (#18359) +* [lynda] Fix authentication (#18158, #18217) +* [gfycat] Update API endpoint (#18333, #18343) ++ [hotstar] Add support for alternative app state layout (#18320) +* [azmedien] Fix extraction (#18334, #18336) ++ [vimeo] Add support for VHX (Vimeo OTT) (#14835) +* [joj] Fix extraction (#18280, #18281) ++ [wistia] Add support for fast.wistia.com (#18287) + + +version 2018.11.23 + +Core ++ [setup.py] Add more relevant classifiers + +Extractors +* [mixcloud] Fallback to hardcoded decryption key (#18016) +* [nbc:news] Fix article extraction (#16194) +* [foxsports] Fix extraction (#17543) +* [loc] Relax regular expression and improve formats extraction ++ [ciscolive] Add support for ciscolive.cisco.com (#17984) +* [nzz] Relax kaltura regex (#18228) +* [sixplay] Fix formats extraction +* [bitchute] Improve title extraction +* [kaltura] Limit requested MediaEntry fields ++ [americastestkitchen] Add support for zype embeds (#18225) ++ [pornhub] Add pornhub.net alias +* [nova:embed] Fix extraction (#18222) + + +version 2018.11.18 + +Extractors ++ [wwe] Extract subtitles ++ [wwe] Add support for playlistst (#14781) ++ [wwe] Add support for wwe.com (#14781, #17450) +* [vk] Detect geo restriction (#17767) +* [openload] Use original host during extraction (#18211) +* [atvat] Fix extraction (#18041) ++ [rte] Add support for new API endpoint (#18206) +* [tnaflixnetwork:embed] Fix extraction (#18205) +* [picarto] Use API and add token support (#16518) ++ [zype] Add support for player.zype.com (#18143) +* [vivo] Fix extraction (#18139) +* [ruutu] Update API endpoint (#18138) + + +version 2018.11.07 + +Extractors ++ [youtube] Add another JS signature function name regex (#18091, #18093, + #18094) +* [facebook] Fix tahoe request (#17171) +* [cliphunter] Fix extraction (#18083) ++ [youtube:playlist] Add support for invidio.us (#18077) +* [zattoo] Arrange API hosts for derived extractors (#18035) ++ [youtube] Add fallback metadata extraction from videoDetails (#18052) + + +version 2018.11.03 + +Core +* [extractor/common] Ensure response handle is not prematurely closed before + it can be read if it matches expected_status (#17195, #17846, #17447) + +Extractors +* [laola1tv:embed] Set correct stream access URL scheme (#16341) ++ [ehftv] Add support for ehftv.com (#15408) +* [azmedien] Adopt to major site redesign (#17745, #17746) ++ [twitcasting] Add support for twitcasting.tv (#17981) +* [orf:tvthek] Fix extraction (#17737, #17956, #18024) ++ [openload] Add support for oload.fun (#18045) +* [njpwworld] Fix authentication (#17427) ++ [linkedin:learning] Add support for linkedin.com/learning (#13545) +* [theplatform] Improve error detection (#13222) +* [cnbc] Simplify extraction (#14280, #17110) ++ [cbnc] Add support for new URL schema (#14193) +* [aparat] Improve extraction and extract more metadata (#17445, #18008) +* [aparat] Fix extraction + + +version 2018.10.29 + +Core ++ [extractor/common] Add validation for JSON-LD URLs + +Extractors ++ [sportbox] Add support for matchtv.ru +* [sportbox] Fix extraction (#17978) +* [screencast] Fix extraction (#14590, #14617, #17990) ++ [openload] Add support for oload.icu ++ [ivi] Add support for ivi.tv +* [crunchyroll] Improve extraction failsafeness (#17991) +* [dailymail] Fix formats extraction (#17976) +* [viewster] Reduce format requests +* [cwtv] Handle API errors (#17905) ++ [rutube] Use geo verification headers (#17897) ++ [brightcove:legacy] Add fallbacks to brightcove:new (#13912) +- [tv3] Remove extractor (#10461, #15339) +* [ted] Fix extraction for HTTP and RTMP formats (#5941, #17572, #17894) ++ [openload] Add support for oload.cc (#17823) ++ [patreon] Extract post_file URL (#17792) +* [patreon] Fix extraction (#14502, #10471) + + +version 2018.10.05 + +Extractors +* [pluralsight] Improve authentication (#17762) +* [dailymotion] Fix extraction (#17699) +* [crunchyroll] Switch to HTTPS for RpcApi (#17749) ++ [philharmoniedeparis] Add support for pad.philharmoniedeparis.fr (#17705) +* [philharmoniedeparis] Fix extraction (#17705) ++ [jamendo] Add support for licensing.jamendo.com (#17724) ++ [openload] Add support for oload.cloud (#17710) +* [pluralsight] Fix subtitles extraction (#17726, #17728) ++ [vimeo] Add another config regular expression (#17690) +* [spike] Fix Paramount Network extraction (#17677) +* [hotstar] Fix extraction (#14694, #14931, #17637) + + +version 2018.09.26 + +Extractors +* [pluralsight] Fix subtitles extraction (#17671) +* [mediaset] Improve embed support (#17668) ++ [youtube] Add support for invidio.us (#17613) ++ [zattoo] Add support for more zattoo platform sites +* [zattoo] Fix extraction (#17175, #17542) + + +version 2018.09.18 + +Core ++ [extractor/common] Introduce channel meta fields + +Extractors +* [adobepass] Don't pollute default headers dict +* [udemy] Don't pollute default headers dict +* [twitch] Don't pollute default headers dict +* [youtube] Don't pollute default query dict (#17593) +* [crunchyroll] Prefer hardsubless formats and formats in locale language +* [vrv] Make format ids deterministic +* [vimeo] Fix ondemand playlist extraction (#14591) ++ [pornhub] Extract upload date (#17574) ++ [porntube] Extract channel meta fields ++ [vimeo] Extract channel meta fields ++ [youtube] Extract channel meta fields (#9676, #12939) +* [porntube] Fix extraction (#17541) +* [asiancrush] Fix extraction (#15630) ++ [twitch:clips] Extend URL regular expression (closes #17559) ++ [vzaar] Add support for HLS +* [tube8] Fix metadata extraction (#17520) +* [eporner] Extract JSON-LD (#17519) + + +version 2018.09.10 + +Core ++ [utils] Properly recognize AV1 codec (#17506) + +Extractors ++ [iprima] Add support for prima.iprima.cz (#17514) ++ [tele5] Add support for tele5.de (#7805, #7922, #17331, #17414) +* [nbc] Fix extraction of percent encoded URLs (#17374) + + +version 2018.09.08 + +Extractors +* [youtube] Fix extraction (#17457, #17464) ++ [pornhub:uservideos] Add support for new URLs (#17388) +* [iprima] Confirm adult check (#17437) +* [slideslive] Make check for video service name case-insensitive (#17429) +* [radiojavan] Fix extraction (#17151) +* [generic] Skip unsuccessful jwplayer extraction (#16735) + + +version 2018.09.01 + +Core +* [utils] Skip remote IP addresses non matching to source address' IP version + when creating a connection (#13422, #17362) + +Extractors ++ [ard] Add support for one.ard.de (#17397) +* [niconico] Fix extraction on python3 (#17393, #17407) +* [ard] Extract f4m formats +* [crunchyroll] Parse vilos media data (#17343) ++ [ard] Add support for Beta ARD Mediathek ++ [bandcamp] Extract more metadata (#13197) +* [internazionale] Fix extraction of non-available-abroad videos (#17386) + + +version 2018.08.28 + +Extractors ++ [youtube:playlist] Add support for music album playlists (OLAK5uy_ prefix) + (#17361) +* [bitchute] Fix extraction by pass custom User-Agent (#17360) +* [webofstories:playlist] Fix extraction (#16914) ++ [tvplayhome] Add support for new tvplay URLs (#17344) ++ [generic] Allow relative src for videojs embeds (#17324) ++ [xfileshare] Add support for vidto.se (#17317) ++ [vidzi] Add support for vidzi.nu (#17316) ++ [nova:embed] Add support for media.cms.nova.cz (#17282) + + +version 2018.08.22 + +Core +* [utils] Use pure browser header for User-Agent (#17236) + +Extractors ++ [kinopoisk] Add support for kinopoisk.ru (#17283) ++ [yourporn] Add support for yourporn.sexy (#17298) ++ [go] Add support for disneynow.go.com (#16299, #17264) ++ [6play] Add support for play.rtl.hr (#17249) +* [anvato] Fallback to generic API key for access-key-to-API-key lookup + (#16788, #17254) +* [lci] Fix extraction (#17274) +* [bbccouk] Extend id URL regular expression (#17270) +* [cwtv] Fix extraction (#17256) +* [nova] Fix extraction (#17241) ++ [generic] Add support for expressen embeds +* [raywenderlich] Adapt to site redesign (#17225) ++ [redbulltv] Add support redbull.com tv URLs (#17218) ++ [bitchute] Add support for bitchute.com (#14052) ++ [clyp] Add support for token protected media (#17184) +* [imdb] Fix extension extraction (#17167) + + +version 2018.08.04 + +Extractors +* [funk:channel] Improve byChannelAlias extraction (#17142) +* [twitch] Fix authentication (#17024, #17126) +* [twitch:vod] Improve URL regular expression (#17135) +* [watchbox] Fix extraction (#17107) +* [pbs] Fix extraction (#17109) +* [theplatform] Relax URL regular expression (#16181, #17097) ++ [viqeo] Add support for viqeo.tv (#17066) + + +version 2018.07.29 + +Extractors +* [crunchyroll:playlist] Restrict URL regular expression (#17069, #17076) ++ [pornhub] Add support for subtitles (#16924, #17088) +* [ceskatelevize] Use https for API call (#16997, #16999) +* [dailymotion:playlist] Fix extraction (#16894) +* [ted] Improve extraction +* [ted] Fix extraction for videos without nativeDownloads (#16756, #17085) +* [telecinco] Fix extraction (#17080) +* [mitele] Reduce number of requests +* [rai] Return non HTTP relinker URL intact (#17055) +* [vk] Fix extraction for inline only videos (#16923) +* [streamcloud] Fix extraction (#17054) +* [facebook] Fix tahoe player extraction with authentication (#16655) ++ [puhutv] Add support for puhutv.com (#12712, #16010, #16269) + + +version 2018.07.21 + +Core ++ [utils] Introduce url_or_none +* [utils] Allow JSONP without function name (#17028) ++ [extractor/common] Extract DASH and MSS formats from SMIL manifests + +Extractors ++ [bbc] Add support for BBC Radio Play pages (#17022) +* [iwara] Fix download URLs (#17026) +* [vrtnu] Relax title extraction and extract JSON-LD (#17018) ++ [viu] Pass Referer and Origin headers and area id (#16992) ++ [vimeo] Add another config regular expression (#17013) ++ [facebook] Extract view count (#16942) +* [dailymotion] Improve description extraction (#16984) +* [slutload] Fix and improve extraction (#17001) +* [mediaset] Fix extraction (#16977) ++ [theplatform] Add support for theplatform TLD customization (#16977) +* [imgur] Relax URL regular expression (#16987) +* [pornhub] Improve extraction and extract all formats (#12166, #15891, #16262, + #16959) + + +version 2018.07.10 + +Core +* [utils] Share JSON-LD regular expression +* [downloader/dash] Improve error handling (#16927) + +Extractors ++ [nrktv] Add support for new season and serie URL schema ++ [nrktv] Add support for new episode URL schema (#16909) ++ [frontendmasters] Add support for frontendmasters.com (#3661, #16328) +* [funk] Fix extraction (#16918) +* [watchbox] Fix extraction (#16904) +* [dplayit] Sort formats +* [dplayit] Fix extraction (#16901) +* [youtube] Improve login error handling (#13822) + + +version 2018.07.04 + +Core +* [extractor/common] Properly escape % in MPD templates (#16867) +* [extractor/common] Use source URL as Referer for HTML5 entries (16849) +* Prefer ffmpeg over avconv by default (#8622) + +Extractors +* [pluralsight] Switch to graphql (#16889, #16895, #16896, #16899) +* [lynda] Simplify login and improve error capturing (#16891) ++ [go90] Add support for embed URLs (#16873) +* [go90] Detect geo restriction error and pass geo verification headers + (#16874) +* [vlive] Fix live streams extraction (#16871) +* [npo] Fix typo (#16872) ++ [mediaset] Add support for new videos and extract all formats (#16568) +* [dctptv] Restore extraction based on REST API (#16850) +* [svt] Improve extraction and add support for pages (#16802) +* [porncom] Fix extraction (#16808) + + +version 2018.06.25 + +Extractors +* [joj] Relax URL regular expression (#16771) +* [brightcove] Workaround sonyliv DRM protected videos (#16807) +* [motherless] Fix extraction (#16786) +* [itv] Make SOAP request non fatal and extract metadata from webpage (#16780) +- [foxnews:insider] Remove extractor (#15810) ++ [foxnews] Add support for iframe embeds (#15810, #16711) + + +version 2018.06.19 + +Core ++ [extractor/common] Introduce expected_status in _download_* methods + for convenient accept of HTTP requests failed with non 2xx status codes ++ [compat] Introduce compat_integer_types + +Extractors +* [peertube] Improve generic support (#16733) ++ [6play] Use geo verification headers +* [rtbf] Fix extraction for python 3.2 +* [vgtv] Improve HLS formats extraction ++ [vgtv] Add support for www.aftonbladet.se/tv URLs +* [bbccouk] Use expected_status +* [markiza] Expect 500 HTTP status code +* [tvnow] Try all clear manifest URLs (#15361) + + +version 2018.06.18 + +Core +* [downloader/rtmp] Fix downloading in verbose mode (#16736) + +Extractors ++ [markiza] Add support for markiza.sk (#16750) +* [wat] Try all supported adaptive URLs ++ [6play] Add support for rtlplay.be and extract hd usp formats ++ [rtbf] Add support for audio and live streams (#9638, #11923) ++ [rtbf] Extract HLS, DASH and all HTTP formats ++ [rtbf] Extract subtitles ++ [rtbf] Fixup specific HTTP URLs (#16101) ++ [expressen] Add support for expressen.se +* [vidzi] Fix extraction (#16678) +* [pbs] Improve extraction (#16623, #16684) +* [bilibili] Restrict cid regular expression (#16638, #16734) + + +version 2018.06.14 + +Core +* [downloader/http] Fix retry on error when streaming to stdout (#16699) + +Extractors ++ [discoverynetworks] Add support for disco-api videos (#16724) ++ [dailymotion] Add support for password protected videos (#9789) ++ [abc:iview] Add support for livestreams (#12354) +* [abc:iview] Fix extraction (#16704) ++ [crackle] Add support for sonycrackle.com (#16698) ++ [tvnet] Add support for tvnet.gov.vn (#15462) +* [nrk] Update API hosts and try all previously known ones (#16690) +* [wimp] Fix Youtube embeds extraction + + +version 2018.06.11 + +Extractors +* [npo] Extend URL regular expression and add support for npostart.nl (#16682) ++ [inc] Add support for another embed schema (#16666) +* [tv4] Fix format extraction (#16650) ++ [nexx] Add support for free cdn (#16538) ++ [pbs] Add another cove id pattern (#15373) ++ [rbmaradio] Add support for 192k format (#16631) + + +version 2018.06.04 + +Extractors ++ [camtube] Add support for camtube.co ++ [twitter:card] Extract guest token (#16609) ++ [chaturbate] Use geo verification headers ++ [bbc] Add support for bbcthree (#16612) +* [youtube] Move metadata extraction after video availability check ++ [youtube] Extract track and artist ++ [safari] Add support for new URL schema (#16614) +* [adn] Fix extraction + + +version 2018.06.02 + +Core +* [utils] Improve determine_ext + +Extractors ++ [facebook] Add support for tahoe player videos (#15441, #16554) +* [cbc] Improve extraction (#16583, #16593) +* [openload] Improve ext extraction (#16595) ++ [twitter:card] Add support for another endpoint (#16586) ++ [openload] Add support for oload.win and oload.download (#16592) +* [audimedia] Fix extraction (#15309) ++ [francetv] Add support for sport.francetvinfo.fr (#15645) +* [mlb] Improve extraction (#16587) +- [nhl] Remove old extractors +* [rbmaradio] Check formats availability (#16585) + + +version 2018.05.30 + +Core +* [downloader/rtmp] Generalize download messages and report time elapsed + on finish +* [downloader/rtmp] Gracefully handle live streams interrupted by user + +Extractors +* [teamcoco] Fix extraction for full episodes (#16573) +* [spiegel] Fix info extraction (#16538) ++ [apa] Add support for apa.at (#15041, #15672) ++ [bellmedia] Add support for bnnbloomberg.ca (#16560) ++ [9c9media] Extract MPD formats and subtitles +* [cammodels] Use geo verification headers ++ [ufctv] Add support for authentication (#16542) ++ [cammodels] Add support for cammodels.com (#14499) +* [utils] Fix style id extraction for namespaced id attribute in dfxp2srt + (#16551) +* [soundcloud] Detect format extension (#16549) +* [cbc] Fix playlist title extraction (#16502) ++ [tumblr] Detect and report sensitive media (#13829) ++ [tumblr] Add support for authentication (#15133) + + +version 2018.05.26 + +Core +* [utils] Improve parse_age_limit + +Extractors +* [audiomack] Stringify video id (#15310) +* [izlesene] Fix extraction (#16233, #16271, #16407) ++ [indavideo] Add support for generic embeds (#11989) +* [indavideo] Fix extraction (#11221) +* [indavideo] Sign download URLs (#16174) ++ [peertube] Add support for PeerTube based sites (#16301, #16329) +* [imgur] Fix extraction (#16537) ++ [hidive] Add support for authentication (#16534) ++ [nbc] Add support for stream.nbcsports.com (#13911) ++ [viewlift] Add support for hoichoi.tv (#16536) +* [go90] Extract age limit and detect DRM protection(#10127) +* [viewlift] fix extraction for snagfilms.com (#15766) +* [globo] Improve extraction (#4189) + * Add support for authentication + * Simplify URL signing + * Extract DASH and MSS formats +* [leeco] Fix extraction (#16464) +* [teamcoco] Add fallback for format extraction (#16484) +* [teamcoco] Improve URL regular expression (#16484) +* [imdb] Improve extraction (#4085, #14557) + + +version 2018.05.18 + +Extractors +* [vimeo:likes] Relax URL regular expression and fix single page likes + extraction (#16475) +* [pluralsight] Fix clip id extraction (#16460) ++ [mychannels] Add support for mychannels.com (#15334) +- [moniker] Remove extractor (#15336) +* [pbs] Fix embed data extraction (#16474) ++ [mtv] Add support for paramountnetwork.com and bellator.com (#15418) +* [youtube] Fix hd720 format position +* [dailymotion] Remove fragment part from m3u8 URLs (#8915) +* [3sat] Improve extraction (#15350) + * Extract all formats + * Extract more format metadata + * Improve format sorting + * Use hls native downloader + * Detect and bypass geo-restriction ++ [dtube] Add support for d.tube (#15201) +* [options] Fix typo (#16450) +* [youtube] Improve format filesize extraction (#16453) +* [youtube] Make uploader extraction non fatal (#16444) +* [youtube] Fix extraction for embed restricted live streams (#16433) +* [nbc] Improve info extraction (#16440) +* [twitch:clips] Fix extraction (#16429) +* [redditr] Relax URL regular expression (#16426, #16427) +* [mixcloud] Bypass throttling for HTTP formats (#12579, #16424) ++ [nick] Add support for nickjr.de (#13230) +* [teamcoco] Fix extraction (#16374) + + +version 2018.05.09 + +Core +* [YoutubeDL] Ensure ext exists for automatic captions +* Introduce --geo-bypass-ip-block + +Extractors ++ [udemy] Extract asset captions ++ [udemy] Extract stream URLs (#16372) ++ [businessinsider] Add support for businessinsider.com (#16387, #16388, #16389) ++ [cloudflarestream] Add support for cloudflarestream.com (#16375) +* [watchbox] Fix extraction (#16356) +* [discovery] Extract Affiliate/Anonymous Auth Token from cookies (#14954) ++ [itv:btcc] Add support for itv.com/btcc (#16139) +* [tunein] Use live title for live streams (#16347) +* [itv] Improve extraction (#16253) + + +version 2018.05.01 + +Core +* [downloader/fragment] Restart download if .ytdl file is corrupt (#16312) ++ [extractor/common] Extract interaction statistic ++ [utils] Add merge_dicts ++ [extractor/common] Add _download_json_handle + +Extractors +* [kaltura] Improve iframe embeds detection (#16337) ++ [udemy] Extract outputs renditions (#16289, #16291, #16320, #16321, #16334, + #16335) ++ [zattoo] Add support for zattoo.com and mobiltv.quickline.com (#14668, #14676) +* [yandexmusic] Convert release_year to int +* [udemy] Override _download_webpage_handle instead of _download_webpage +* [xiami] Override _download_webpage_handle instead of _download_webpage +* [yandexmusic] Override _download_webpage_handle instead of _download_webpage +* [youtube] Correctly disable polymer on all requests (#16323, #16326) +* [generic] Prefer enclosures over links in RSS feeds (#16189) ++ [redditr] Add support for old.reddit.com URLs (#16274) +* [nrktv] Update API host (#16324) ++ [imdb] Extract all formats (#16249) ++ [vimeo] Extract JSON-LD (#16295) +* [funk:channel] Improve extraction (#16285) + + +version 2018.04.25 + +Core +* [utils] Fix match_str for boolean meta fields ++ [Makefile] Add support for pandoc 2 and disable smart extension (#16251) +* [YoutubeDL] Fix typo in media extension compatibility checker (#16215) + +Extractors ++ [openload] Recognize IPv6 stream URLs (#16136, #16137, #16205, #16246, + #16250) ++ [twitch] Extract is_live according to status (#16259) +* [pornflip] Relax URL regular expression (#16258) +- [etonline] Remove extractor (#16256) +* [breakcom] Fix extraction (#16254) ++ [youtube] Add ability to authenticate with cookies +* [youtube:feed] Implement lazy playlist extraction (#10184) ++ [svt] Add support for TV channel live streams (#15279, #15809) +* [ccma] Fix video extraction (#15931) +* [rentv] Fix extraction (#15227) ++ [nick] Add support for nickjr.nl (#16230) +* [extremetube] Fix metadata extraction ++ [keezmovies] Add support for generic embeds (#16134, #16154) +* [nexx] Extract new azure URLs (#16223) +* [cbssports] Fix extraction (#16217) +* [kaltura] Improve embeds detection (#16201) +* [instagram:user] Fix extraction (#16119) +* [cbs] Skip DRM asset types (#16104) + + +version 2018.04.16 + +Extractors +* [smotri:broadcast] Fix extraction (#16180) ++ [picarto] Add support for picarto.tv (#6205, #12514, #15276, #15551) +* [vine:user] Fix extraction (#15514, #16190) +* [pornhub] Relax URL regular expression (#16165) +* [cbc:watch] Re-acquire device token when expired (#16160) ++ [fxnetworks] Add support for https theplatform URLs (#16125, #16157) ++ [instagram:user] Add request signing (#16119) ++ [twitch] Add support for mobile URLs (#16146) + + +version 2018.04.09 + +Core +* [YoutubeDL] Do not save/restore console title while simulate (#16103) +* [extractor/common] Relax JSON-LD context check (#16006) + +Extractors ++ [generic] Add support for tube8 embeds ++ [generic] Add support for share-videos.se embeds (#16089, #16115) +* [odnoklassniki] Extend URL regular expression (#16081) +* [steam] Bypass mature content check (#16113) ++ [acast] Extract more metadata +* [acast] Fix extraction (#16118) +* [instagram:user] Fix extraction (#16119) +* [drtuber] Fix title extraction (#16107, #16108) +* [liveleak] Extend URL regular expression (#16117) ++ [openload] Add support for oload.xyz +* [openload] Relax stream URL regular expression +* [openload] Fix extraction (#16099) ++ [svtplay:series] Add support for season URLs ++ [svtplay:series] Add support for series (#11130, #16059) + + +version 2018.04.03 + +Extractors ++ [tvnow] Add support for shows (#15837) +* [dramafever] Fix authentication (#16067) +* [afreecatv] Use partial view only when necessary (#14450) ++ [afreecatv] Add support for authentication (#14450) ++ [nationalgeographic] Add support for new URL schema (#16001, #16054) +* [xvideos] Fix thumbnail extraction (#15978, #15979) +* [medialaan] Fix vod id (#16038) ++ [openload] Add support for oload.site (#16039) +* [naver] Fix extraction (#16029) +* [dramafever] Partially switch to API v5 (#16026) +* [abc:iview] Unescape title and series meta fields (#15994) +* [videa] Extend URL regular expression (#16003) + + +version 2018.03.26.1 + +Core ++ [downloader/external] Add elapsed time to progress hook (#10876) +* [downloader/external,fragment] Fix download finalization when writing file + to stdout (#10809, #10876, #15799) + +Extractors +* [vrv] Fix extraction on python2 (#15928) +* [afreecatv] Update referrer (#15947) ++ [24video] Add support for 24video.sexy (#15973) +* [crackle] Bypass geo restriction +* [crackle] Fix extraction (#15969) ++ [lenta] Add support for lenta.ru (#15953) ++ [instagram:user] Add pagination (#15934) +* [youku] Update ccode (#15939) +* [libsyn] Adapt to new page structure + + +version 2018.03.20 + +Core +* [extractor/common] Improve thumbnail extraction for HTML5 entries +* Generalize XML manifest processing code and improve XSPF parsing ++ [extractor/common] Add _download_xml_handle ++ [extractor/common] Add support for relative URIs in _parse_xspf (#15794) + +Extractors ++ [7plus] Extract series metadata (#15862, #15906) +* [9now] Bypass geo restriction (#15920) +* [cbs] Skip unavailable assets (#13490, #13506, #15776) ++ [canalc2] Add support for HTML5 videos (#15916, #15919) ++ [ceskatelevize] Add support for iframe embeds (#15918) ++ [prosiebensat1] Add support for galileo.tv (#15894) ++ [generic] Add support for xfileshare embeds (#15879) +* [bilibili] Switch to v2 playurl API +* [bilibili] Fix and improve extraction (#15048, #15430, #15622, #15863) +* [heise] Improve extraction (#15496, #15784, #15026) +* [instagram] Fix user videos extraction (#15858) + + +version 2018.03.14 + +Extractors +* [soundcloud] Update client id (#15866) ++ [tennistv] Add support for tennistv.com ++ [line] Add support for tv.line.me (#9427) +* [xnxx] Fix extraction (#15817) +* [njpwworld] Fix authentication (#15815) + + +version 2018.03.10 + +Core +* [downloader/hls] Skip uplynk ad fragments (#15748) + +Extractors +* [pornhub] Don't override session cookies (#15697) ++ [raywenderlich] Add support for videos.raywenderlich.com (#15251) +* [funk] Fix extraction and rework extractors (#15792) +* [nexx] Restore reverse engineered approach ++ [heise] Add support for kaltura embeds (#14961, #15728) ++ [tvnow] Extract series metadata (#15774) +* [ruutu] Continue formats extraction on NOT-USED URLs (#15775) +* [vrtnu] Use redirect URL for building video JSON URL (#15767, #15769) +* [vimeo] Modernize login code and improve error messaging +* [archiveorg] Fix extraction (#15770, #15772) ++ [hidive] Add support for hidive.com (#15494) +* [afreecatv] Detect deleted videos +* [afreecatv] Fix extraction (#15755) +* [vice] Fix extraction and rework extractors (#11101, #13019, #13622, #13778) ++ [vidzi] Add support for vidzi.si (#15751) +* [npo] Fix typo + + +version 2018.03.03 + +Core ++ [utils] Add parse_resolution +Revert respect --prefer-insecure while updating + +Extractors ++ [yapfiles] Add support for yapfiles.ru (#15726, #11085) +* [spankbang] Fix formats extraction (#15727) +* [adn] Fix extraction (#15716) ++ [toggle] Extract DASH and ISM formats (#15721) ++ [nickelodeon] Add support for nickelodeon.com.tr (#15706) +* [npo] Validate and filter format URLs (#15709) + + +version 2018.02.26 + +Extractors +* [udemy] Use custom User-Agent (#15571) + + +version 2018.02.25 + +Core +* [postprocessor/embedthumbnail] Skip embedding when there aren't any + thumbnails (#12573) +* [extractor/common] Improve jwplayer subtitles extraction (#15695) + +Extractors ++ [vidlii] Add support for vidlii.com (#14472, #14512, #14779) ++ [streamango] Capture and output error messages +* [streamango] Fix extraction (#14160, #14256) ++ [telequebec] Add support for emissions (#14649, #14655) ++ [telequebec:live] Add support for live streams (#15688) ++ [mailru:music] Add support for mail.ru/music (#15618) +* [aenetworks] Switch to akamai HLS formats (#15612) +* [ytsearch] Fix flat title extraction (#11260, #15681) + + +version 2018.02.22 + +Core ++ [utils] Fixup some common URL typos in sanitize_url (#15649) +* Respect --prefer-insecure while updating (#15497) + +Extractors +* [vidio] Fix HLS URL extraction (#15675) ++ [nexx] Add support for arc.nexx.cloud URLs +* [nexx] Switch to arc API (#15652) +* [redtube] Fix duration extraction (#15659) ++ [sonyliv] Respect referrer (#15648) ++ [brightcove:new] Use referrer for formats' HTTP headers ++ [cbc] Add support for olympics.cbc.ca (#15535) ++ [fusion] Add support for fusion.tv (#15628) +* [npo] Improve quality metadata extraction +* [npo] Relax URL regular expression (#14987, #14994) ++ [npo] Capture and output error message ++ [pornhub] Add support for channels (#15613) +* [youtube] Handle shared URLs with generic extractor (#14303) + + +version 2018.02.11 + +Core ++ [YoutubeDL] Add support for filesize_approx in format selector (#15550) + +Extractors ++ [francetv] Add support for live streams (#13689) ++ [francetv] Add support for zouzous.fr and ludo.fr (#10454, #13087, #13103, + #15012) +* [francetv] Separate main extractor and rework others to delegate to it +* [francetv] Improve manifest URL signing (#15536) ++ [francetv] Sign m3u8 manifest URLs (#15565) ++ [veoh] Add support for embed URLs (#15561) +* [afreecatv] Fix extraction (#15556) +* [periscope] Use accessVideoPublic endpoint (#15554) +* [discovery] Fix auth request (#15542) ++ [6play] Extract subtitles (#15541) +* [newgrounds] Fix metadata extraction (#15531) ++ [nbc] Add support for stream.nbcolympics.com (#10295) +* [dvtv] Fix live streams extraction (#15442) + + +version 2018.02.08 + +Extractors ++ [myvi] Extend URL regular expression ++ [myvi:embed] Add support for myvi.tv embeds (#15521) ++ [prosiebensat1] Extend URL regular expression (#15520) +* [pokemon] Relax URL regular expression and extend title extraction (#15518) ++ [gameinformer] Use geo verification headers +* [la7] Fix extraction (#15501, #15502) +* [gameinformer] Fix brightcove id extraction (#15416) ++ [afreecatv] Pass referrer to video info request (#15507) ++ [telebruxelles] Add support for live streams +* [telebruxelles] Relax URL regular expression +* [telebruxelles] Fix extraction (#15504) +* [extractor/common] Respect secure schemes in _extract_wowza_formats + + +version 2018.02.04 + +Core +* [downloader/http] Randomize HTTP chunk size ++ [downloader/http] Add ability to pass downloader options via info dict +* [downloader/http] Fix 302 infinite loops by not reusing requests ++ Document http_chunk_size + +Extractors ++ [brightcove] Pass embed page URL as referrer (#15486) ++ [youtube] Enforce using chunked HTTP downloading for DASH formats + + +version 2018.02.03 + +Core ++ Introduce --http-chunk-size for chunk-based HTTP downloading ++ Add support for IronPython +* [downloader/ism] Fix Python 3.2 support + +Extractors +* [redbulltv] Fix extraction (#15481) +* [redtube] Fix metadata extraction (#15472) +* [pladform] Respect platform id and extract HLS formats (#15468) +- [rtlnl] Remove progressive formats (#15459) +* [6play] Do no modify asset URLs with a token (#15248) +* [nationalgeographic] Relax URL regular expression +* [dplay] Relax URL regular expression (#15458) +* [cbsinteractive] Fix data extraction (#15451) ++ [amcnetworks] Add support for sundancetv.com (#9260) + + +version 2018.01.27 + +Core +* [extractor/common] Improve _json_ld for articles +* Switch codebase to use compat_b64decode ++ [compat] Add compat_b64decode + +Extractors ++ [seznamzpravy] Add support for seznam.cz and seznamzpravy.cz (#14102, #14616) +* [dplay] Bypass geo restriction ++ [dplay] Add support for disco-api videos (#15396) +* [youtube] Extract precise error messages (#15284) +* [teachertube] Capture and output error message +* [teachertube] Fix and relax thumbnail extraction (#15403) ++ [prosiebensat1] Add another clip id regular expression (#15378) +* [tbs] Update tokenizer url (#15395) +* [mixcloud] Use compat_b64decode (#15394) +- [thesixtyone] Remove extractor (#15341) + + +version 2018.01.21 + +Core +* [extractor/common] Improve jwplayer DASH formats extraction (#9242, #15187) +* [utils] Improve scientific notation handling in js_to_json (#14789) + +Extractors ++ [southparkdk] Add support for southparkstudios.nu ++ [southpark] Add support for collections (#14803) +* [franceinter] Fix upload date extraction (#14996) ++ [rtvs] Add support for rtvs.sk (#9242, #15187) +* [restudy] Fix extraction and extend URL regular expression (#15347) +* [youtube:live] Improve live detection (#15365) ++ [springboardplatform] Add support for springboardplatform.com +* [prosiebensat1] Add another clip id regular expression (#15290) +- [ringtv] Remove extractor (#15345) + + +version 2018.01.18 + +Extractors +* [soundcloud] Update client id (#15306) +- [kamcord] Remove extractor (#15322) ++ [spiegel] Add support for nexx videos (#15285) +* [twitch] Fix authentication and error capture (#14090, #15264) +* [vk] Detect more errors due to copyright complaints (#15259) + + +version 2018.01.14 + +Extractors +* [youtube] Fix live streams extraction (#15202) +* [wdr] Bypass geo restriction +* [wdr] Rework extractors (#14598) ++ [wdr] Add support for wdrmaus.de/elefantenseite (#14598) ++ [gamestar] Add support for gamepro.de (#3384) +* [viafree] Skip rtmp formats (#15232) ++ [pandoratv] Add support for mobile URLs (#12441) ++ [pandoratv] Add support for new URL format (#15131) ++ [ximalaya] Add support for ximalaya.com (#14687) ++ [digg] Add support for digg.com (#15214) +* [limelight] Tolerate empty pc formats (#15150, #15151, #15207) +* [ndr:embed:base] Make separate formats extraction non fatal (#15203) ++ [weibo] Add extractor (#15079) ++ [ok] Add support for live streams +* [canalplus] Fix extraction (#15072) +* [bilibili] Fix extraction (#15188) + + +version 2018.01.07 + +Core +* [utils] Fix youtube-dl under PyPy3 on Windows +* [YoutubeDL] Output python implementation in debug header + +Extractors ++ [jwplatform] Add support for multiple embeds (#15192) +* [mitele] Fix extraction (#15186) ++ [motherless] Add support for groups (#15124) +* [lynda] Relax URL regular expression (#15185) +* [soundcloud] Fallback to avatar picture for thumbnail (#12878) +* [youku] Fix list extraction (#15135) +* [openload] Fix extraction (#15166) +* [lynda] Skip invalid subtitles (#15159) +* [twitch] Pass video id to url_result when extracting playlist (#15139) +* [rtve.es:alacarta] Fix extraction of some new URLs +* [acast] Fix extraction (#15147) + + +version 2017.12.31 + +Core ++ [extractor/common] Add container meta field for formats extracted + in _parse_mpd_formats (#13616) ++ [downloader/hls] Use HTTP headers for key request +* [common] Use AACL as the default fourcc when AudioTag is 255 +* [extractor/common] Fix extraction of DASH formats with the same + representation id (#15111) + +Extractors ++ [slutload] Add support for mobile URLs (#14806) +* [abc:iview] Bypass geo restriction +* [abc:iview] Fix extraction (#14711, #14782, #14838, #14917, #14963, #14985, + #15035, #15057, #15061, #15071, #15095, #15106) +* [openload] Fix extraction (#15118) +- [sandia] Remove extractor +- [collegerama] Remove extractor ++ [mediasite] Add support for sites based on Mediasite Video Platform (#5428, + #11185, #14343) ++ [ufctv] Add support for ufc.tv (#14520) +* [pluralsight] Fix missing first line of subtitles (#11118) +* [openload] Fallback on f-page extraction (#14665, #14879) +* [vimeo] Improve password protected videos extraction (#15114) +* [aws] Fix canonical/signed headers generation on python 2 (#15102) + + +version 2017.12.28 + +Extractors ++ [internazionale] Add support for internazionale.it (#14973) +* [playtvak] Relax video regular expression and make description optional + (#15037) ++ [filmweb] Add support for filmweb.no (#8773, #10368) ++ [23video] Add support for 23video.com ++ [espn] Add support for fivethirtyeight.com (#6864) ++ [umg:de] Add support for universal-music.de (#11582, #11584) ++ [espn] Add support for espnfc and extract more formats (#8053) +* [youku] Update ccode (#14880) ++ [openload] Add support for oload.stream (#15070) +* [youku] Fix list extraction (#15065) + + +version 2017.12.23 + +Core +* [extractor/common] Move X-Forwarded-For setup code into _request_webpage ++ [YoutubeDL] Add support for playlist_uploader and playlist_uploader_id in + output template (#11427, #15018) ++ [extractor/common] Introduce uploader, uploader_id and uploader_url + meta fields for playlists (#11427, #15018) +* [downloader/fragment] Encode filename of fragment being removed (#15020) ++ [utils] Add another date format pattern (#14999) + +Extractors ++ [kaltura] Add another embed pattern for entry_id ++ [7plus] Add support for 7plus.com.au (#15043) +* [animeondemand] Relax login error regular expression ++ [shahid] Add support for show pages (#7401) ++ [youtube] Extract uploader, uploader_id and uploader_url for playlists + (#11427, #15018) +* [afreecatv] Improve format extraction (#15019) ++ [cspan] Add support for audio only pages and catch page errors (#14995) ++ [mailru] Add support for embed URLs (#14904) +* [crunchyroll] Future-proof XML element checks (#15013) +* [cbslocal] Fix timestamp extraction (#14999, #15000) +* [discoverygo] Correct TTML subtitle extension +* [vk] Make view count optional (#14979) +* [disney] Skip Apple FairPlay formats (#14982) +* [voot] Fix format extraction (#14758) + + +version 2017.12.14 + +Core +* [postprocessor/xattr] Clarify NO_SPACE message (#14970) +* [downloader/http] Return actual download result from real_download (#14971) + +Extractors ++ [itv] Extract more subtitles and duration +* [itv] Improve extraction (#14944) ++ [byutv] Add support for geo restricted videos +* [byutv] Fix extraction (#14966, #14967) ++ [bbccouk] Fix extraction for 320k HLS streams ++ [toutv] Add support for special video URLs (#14179) +* [discovery] Fix free videos extraction (#14157, #14954) +* [tvnow] Fix extraction (#7831) ++ [nickelodeon:br] Add support for nickelodeon brazil websites (#14893) +* [nick] Improve extraction (#14876) +* [tbs] Fix extraction (#13658) + + +version 2017.12.10 + +Core ++ [utils] Add sami mimetype to mimetype2ext + +Extractors +* [culturebox] Improve video id extraction (#14947) +* [twitter] Improve extraction (#14197) ++ [udemy] Extract more HLS formats +* [udemy] Improve course id extraction (#14938) ++ [stretchinternet] Add support for portal.stretchinternet.com (#14576) +* [ellentube] Fix extraction (#14407, #14570) ++ [raiplay:playlist] Add support for playlists (#14563) +* [sonyliv] Bypass geo restriction +* [sonyliv] Extract higher quality formats (#14922) +* [fox] Extract subtitles ++ [fox] Add support for Adobe Pass authentication (#14205, #14489) +- [dailymotion:cloud] Remove extractor (#6794) +* [xhamster] Fix thumbnail extraction (#14780) ++ [xhamster] Add support for mobile URLs (#14780) +* [generic] Don't pass video id as mpd id while extracting DASH (#14902) +* [ard] Skip invalid stream URLs (#14906) +* [porncom] Fix metadata extraction (#14911) +* [pluralsight] Detect agreement request (#14913) +* [toutv] Fix login (#14614) + + +version 2017.12.02 + +Core ++ [downloader/fragment] Commit part file after each fragment ++ [extractor/common] Add durations for DASH fragments with bare SegmentURLs ++ [extractor/common] Add support for DASH manifests with SegmentLists with + bare SegmentURLs (#14844) ++ [utils] Add hvc1 codec code to parse_codecs + +Extractors +* [xhamster] Fix extraction (#14884) +* [youku] Update ccode (#14872) +* [mnet] Fix format extraction (#14883) ++ [xiami] Add Referer header to API request +* [mtv] Correct scc extention in extracted subtitles (#13730) +* [vvvvid] Fix extraction for kenc videos (#13406) ++ [br] Add support for BR Mediathek videos (#14560, #14788) ++ [daisuki] Add support for motto.daisuki.com (#14681) +* [odnoklassniki] Fix API metadata request (#14862) +* [itv] Fix HLS formats extraction ++ [pbs] Add another media id regular expression + + +version 2017.11.26 + +Core +* [extractor/common] Use final URL when dumping request (#14769) + +Extractors +* [fczenit] Fix extraction +- [firstpost] Remove extractor +* [freespeech] Fix extraction +* [nexx] Extract more formats ++ [openload] Add support for openload.link (#14763) +* [empflix] Relax URL regular expression +* [empflix] Fix extractrion +* [tnaflix] Don't modify download URLs (#14811) +- [gamersyde] Remove extractor +* [francetv:generationwhat] Fix extraction ++ [massengeschmacktv] Add support for Massengeschmack TV +* [fox9] Fix extraction +* [faz] Fix extraction and add support for Perform Group embeds (#14714) ++ [performgroup] Add support for performgroup.com ++ [jwplatform] Add support for iframes (#14828) +* [culturebox] Fix extraction (#14827) +* [youku] Fix extraction; update ccode (#14815) +* [livestream] Make SMIL extraction non fatal (#14792) ++ [drtuber] Add support for mobile URLs (#14772) ++ [spankbang] Add support for mobile URLs (#14771) +* [instagram] Fix description, timestamp and counters extraction (#14755) + + +version 2017.11.15 + +Core +* [common] Skip Apple FairPlay m3u8 manifests (#14741) +* [YoutubeDL] Fix playlist range optimization for --playlist-items (#14740) + +Extractors +* [vshare] Capture and output error message +* [vshare] Fix extraction (#14473) +* [crunchyroll] Extract old RTMP formats +* [tva] Fix extraction (#14736) +* [gamespot] Lower preference of HTTP formats (#14652) +* [instagram:user] Fix extraction (#14699) +* [ccma] Fix typo (#14730) +- Remove sensitive data from logging in messages +* [instagram:user] Fix extraction (#14699) ++ [gamespot] Add support for article URLs (#14652) +* [gamespot] Skip Brightcove Once HTTP formats (#14652) +* [cartoonnetwork] Update tokenizer_src (#14666) ++ [wsj] Recognize another URL pattern (#14704) +* [pandatv] Update API URL and sign format URLs (#14693) +* [crunchyroll] Use old login method (#11572) + + +version 2017.11.06 + +Core ++ [extractor/common] Add protocol for f4m formats +* [f4m] Prefer baseURL for relative URLs (#14660) +* [extractor/common] Respect URL query in _extract_wowza_formats (14645) + +Extractors ++ [hotstar:playlist] Add support for playlists (#12465) +* [hotstar] Bypass geo restriction (#14672) +- [22tracks] Remove extractor (#11024, #14628) ++ [skysport] Sdd support ooyala videos protected with embed_token (#14641) +* [gamespot] Extract formats referenced with new data fields (#14652) +* [spankbang] Detect unavailable videos (#14644) + + +version 2017.10.29 + +Core +* [extractor/common] Prefix format id for audio only HLS formats ++ [utils] Add support for zero years and months in parse_duration + +Extractors +* [egghead] Fix extraction (#14388) ++ [fxnetworks] Extract series metadata (#14603) ++ [younow] Add support for younow.com (#9255, #9432, #12436) +* [dctptv] Fix extraction (#14599) +* [youtube] Restrict embed regular expression (#14600) +* [vimeo] Restrict iframe embed regular expression (#14600) +* [soundgasm] Improve extraction (#14588) +- [myvideo] Remove extractor (#8557) ++ [nbc] Add support for classic-tv videos (#14575) ++ [vrtnu] Add support for cookies authentication and simplify (#11873) ++ [canvas] Add support for vrt.be/vrtnu (#11873) +* [twitch:clips] Fix title extraction (#14566) ++ [ndtv] Add support for sub-sites (#14534) +* [dramafever] Fix login error message extraction ++ [nick] Add support for more nickelodeon sites (no, dk, se, ch, fr, es, pt, + ro, hu) (#14553) + + +version 2017.10.20 + +Core +* [downloader/fragment] Report warning instead of error on inconsistent + download state +* [downloader/hls] Fix total fragments count when ad fragments exist + +Extractors +* [parliamentliveuk] Fix extraction (#14524) +* [soundcloud] Update client id (#14546) ++ [servus] Add support for servus.com (#14362) ++ [unity] Add support for unity3d.com (#14528) +* [youtube] Replace youtube redirect URLs in description (#14517) +* [pbs] Restrict direct video URL regular expression (#14519) +* [drtv] Respect preference for direct HTTP formats (#14509) ++ [eporner] Add support for embed URLs (#14507) +* [arte] Capture and output error message +* [niconico] Improve uploader metadata extraction robustness (#14135) + + +version 2017.10.15.1 + +Core +* [downloader/hls] Ignore anvato ad fragments (#14496) +* [downloader/fragment] Output ad fragment count + +Extractors +* [scrippsnetworks:watch] Bypass geo restriction ++ [anvato] Add ability to bypass geo restriction +* [redditr] Fix extraction for URLs with query (#14495) + + +version 2017.10.15 + +Core ++ [common] Add support for jwplayer youtube embeds + +Extractors +* [scrippsnetworks:watch] Fix extraction (#14389) +* [anvato] Process master m3u8 manifests +* [youtube] Fix relative URLs in description +* [spike] Bypass geo restriction ++ [howstuffworks] Add support for more domains +* [infoq] Fix http format downloading ++ [rtlnl] Add support for another type of embeds ++ [onionstudios] Add support for bulbs-video embeds +* [udn] Fix extraction +* [shahid] Fix extraction (#14448) +* [kaltura] Ignore Widevine encrypted video (.wvm) (#14471) +* [vh1] Fix extraction (#9613) + + +version 2017.10.12 + +Core +* [YoutubeDL] Improve _default_format_spec (#14461) + +Extractors +* [steam] Fix extraction (#14067) ++ [funk] Add support for funk.net (#14464) ++ [nexx] Add support for shortcuts and relax domain id extraction ++ [voxmedia] Add support for recode.net (#14173) ++ [once] Add support for vmap URLs ++ [generic] Add support for channel9 embeds (#14469) +* [tva] Fix extraction (#14328) ++ [tubitv] Add support for new URL format (#14460) +- [afreecatv:global] Remove extractor +- [youtube:shared] Removed extractor (#14420) ++ [slideslive] Add support for slideslive.com (#2680) ++ [facebook] Support thumbnails (#14416) +* [vvvvid] Fix episode number extraction (#14456) +* [hrti:playlist] Relax URL regular expression +* [wdr] Relax media link regular expression (#14447) +* [hrti] Relax URL regular expression (#14443) +* [fox] Delegate extraction to uplynk:preplay (#14147) ++ [youtube] Add support for hooktube.com (#14437) + + +version 2017.10.07 + +Core +* [YoutubeDL] Ignore duplicates in --playlist-items +* [YoutubeDL] Fix out of range --playlist-items for iterable playlists and + reduce code duplication (#14425) ++ [utils] Use cache in OnDemandPagedList by default +* [postprocessor/ffmpeg] Convert to opus using libopus (#14381) + +Extractors +* [reddit] Sort formats (#14430) +* [lnkgo] Relax URL regular expression (#14423) +* [pornflip] Extend URL regular expression (#14405, #14406) ++ [xtube] Add support for embed URLs (#14417) ++ [xvideos] Add support for embed URLs and improve extraction (#14409) +* [beeg] Fix extraction (#14403) +* [tvn24] Relax URL regular expression (#14395) +* [nbc] Fix extraction (#13651, #13715, #14137, #14198, #14312, #14314, #14378, + #14392, #14414, #14419, #14431) ++ [ketnet] Add support for videos without direct sources (#14377) +* [canvas] Generalize mediazone.vrt.be extractor and rework canvas and een ++ [afreecatv] Add support for adult videos (#14376) + + +version 2017.10.01 + +Core +* [YoutubeDL] Document youtube_include_dash_manifest + +Extractors ++ [tvp] Add support for new URL schema (#14368) ++ [generic] Add support for single format Video.js embeds (#14371) +* [yahoo] Bypass geo restriction for brightcove (#14210) +* [yahoo] Use extracted brightcove account id (#14210) +* [rtve:alacarta] Fix extraction (#14290) ++ [yahoo] Add support for custom brigthcove embeds (#14210) ++ [generic] Add support for Video.js embeds ++ [gfycat] Add support for /gifs/detail URLs (#14322) +* [generic] Fix infinite recursion for twitter:player URLs (#14339) +* [xhamsterembed] Fix extraction (#14308) + + +version 2017.09.24 + +Core ++ [options] Accept lrc as a subtitle conversion target format (#14292) +* [utils] Fix handling raw TTML subtitles (#14191) + +Extractors +* [24video] Fix timestamp extraction and make non fatal (#14295) ++ [24video] Add support for 24video.adult (#14295) ++ [kakao] Add support for tv.kakao.com (#12298, #14007) ++ [twitter] Add support for URLs without user id (#14270) ++ [americastestkitchen] Add support for americastestkitchen.com (#10764, + #13996) +* [generic] Fix support for multiple HTML5 videos on one page (#14080) +* [mixcloud] Fix extraction (#14088, #14132) ++ [lynda] Add support for educourse.ga (#14286) +* [beeg] Fix extraction (#14275) +* [nbcsports:vplayer] Correct theplatform URL (#13873) +* [twitter] Fix duration extraction (#14141) +* [tvplay] Bypass geo restriction ++ [heise] Add support for YouTube embeds (#14109) ++ [popcorntv] Add support for popcorntv.it (#5914, #14211) +* [viki] Update app data (#14181) +* [morningstar] Relax URL regular expression (#14222) +* [openload] Fix extraction (#14225, #14257) +* [noovo] Fix extraction (#14214) +* [dailymotion:playlist] Relax URL regular expression (#14219) ++ [twitch] Add support for go.twitch.tv URLs (#14215) +* [vgtv] Relax URL regular expression (#14223) + + +version 2017.09.15 + +Core +* [downloader/fragment] Restart inconsistent incomplete fragment downloads + (#13731) +* [YoutubeDL] Download raw subtitles files (#12909, #14191) + +Extractors +* [condenast] Fix extraction (#14196, #14207) ++ [orf] Add support for f4m stories +* [tv4] Relax URL regular expression (#14206) +* [animeondemand] Bypass geo restriction ++ [animeondemand] Add support for flash videos (#9944) + + +version 2017.09.11 + +Extractors +* [rutube:playlist] Fix suitable (#14166) + + +version 2017.09.10 + +Core ++ [utils] Introduce bool_or_none +* [YoutubeDL] Ensure dir existence for each requested format (#14116) + +Extractors +* [fox] Fix extraction (#14147) +* [rutube] Use bool_or_none +* [rutube] Rework and generalize playlist extractors (#13565) ++ [rutube:playlist] Add support for playlists (#13534, #13565) ++ [radiocanada] Add fallback for title extraction (#14145) +* [vk] Use dedicated YouTube embeds extraction routine +* [vice] Use dedicated YouTube embeds extraction routine +* [cracked] Use dedicated YouTube embeds extraction routine +* [chilloutzone] Use dedicated YouTube embeds extraction routine +* [abcnews] Use dedicated YouTube embeds extraction routine +* [youtube] Separate methods for embeds extraction +* [redtube] Fix formats extraction (#14122) +* [arte] Relax unavailability check (#14112) ++ [manyvids] Add support for preview videos from manyvids.com (#14053, #14059) +* [vidme:user] Relax URL regular expression (#14054) +* [bpb] Fix extraction (#14043, #14086) +* [soundcloud] Fix download URL with private tracks (#14093) +* [aliexpress:live] Add support for live.aliexpress.com (#13698, #13707) +* [viidea] Capture and output lecture error message (#14099) +* [radiocanada] Skip unsupported platforms (#14100) + + +version 2017.09.02 + +Extractors +* [youtube] Force old layout for each webpage (#14068, #14072, #14074, #14076, + #14077, #14079, #14082, #14083, #14094, #14095, #14096) +* [youtube] Fix upload date extraction (#14065) ++ [charlierose] Add support for episodes (#14062) ++ [bbccouk] Add support for w-prefixed ids (#14056) +* [googledrive] Extend URL regular expression (#9785) ++ [googledrive] Add support for source format (#14046) +* [pornhd] Fix extraction (#14005) + + +version 2017.08.27.1 + +Extractors + +* [youtube] Fix extraction with --youtube-skip-dash-manifest enabled (#14037) + + +version 2017.08.27 + +Core ++ [extractor/common] Extract height and format id for HTML5 videos (#14034) +* [downloader/http] Rework HTTP downloader (#506, #809, #2849, #4240, #6023, + #8625, #9483) + * Simplify code and split into separate routines to facilitate maintaining + * Make retry mechanism work on errors during actual download not only + during connection establishment phase + * Retry on ECONNRESET and ETIMEDOUT during reading data from network + * Retry on content too short + * Show error description on retry + +Extractors +* [generic] Lower preference for extraction from LD-JSON +* [rai] Fix audio formats extraction (#14024) +* [youtube] Fix controversy videos extraction (#14027, #14029) +* [mixcloud] Fix extraction (#14015, #14020) + + +version 2017.08.23 + +Core ++ [extractor/common] Introduce _parse_xml +* [extractor/common] Make HLS and DASH extraction in_parse_html5_media_entries + non fatal (#13970) +* [utils] Fix unescapeHTML for misformed string like "&a"" (#13935) + +Extractors +* [cbc:watch] Bypass geo restriction (#13993) +* [toutv] Relax DRM check (#13994) ++ [googledrive] Add support for subtitles (#13619, #13638) +* [pornhub] Relax uploader regular expression (#13906, #13975) +* [bandcamp:album] Extract track titles (#13962) ++ [bbccouk] Add support for events URLs (#13893) ++ [liveleak] Support multi-video pages (#6542) ++ [liveleak] Support another liveleak embedding pattern (#13336) +* [cda] Fix extraction (#13935) ++ [laola1tv] Add support for tv.ittf.com (#13965) +* [mixcloud] Fix extraction (#13958, #13974, #13980, #14003) + + +version 2017.08.18 + +Core +* [YoutubeDL] Sanitize byte string format URLs (#13951) ++ [extractor/common] Add support for float durations in _parse_mpd_formats + (#13919) + +Extractors +* [arte] Detect unavailable videos (#13945) +* [generic] Convert redirect URLs to unicode strings (#13951) +* [udemy] Fix paid course detection (#13943) +* [pluralsight] Use RPC API for course extraction (#13937) ++ [clippit] Add support for clippituser.tv ++ [qqmusic] Support new URL schemes (#13805) +* [periscope] Renew HLS extraction (#13917) +* [mixcloud] Extract decrypt key + + +version 2017.08.13 + +Core +* [YoutubeDL] Make sure format id is not empty +* [extractor/common] Make _family_friendly_search optional +* [extractor/common] Respect source's type attribute for HTML5 media (#13892) + +Extractors +* [pornhub:playlistbase] Skip videos from drop-down menu (#12819, #13902) ++ [fourtube] Add support pornerbros.com (#6022) ++ [fourtube] Add support porntube.com (#7859, #13901) ++ [fourtube] Add support fux.com +* [limelight] Improve embeds detection (#13895) ++ [reddit] Add support for v.redd.it and reddit.com (#13847) +* [aparat] Extract all formats (#13887) +* [mixcloud] Fix play info decryption (#13885) ++ [generic] Add support for vzaar embeds (#13876) + + +version 2017.08.09 + +Core +* [utils] Skip missing params in cli_bool_option (#13865) + +Extractors +* [xxxymovies] Fix title extraction (#13868) ++ [nick] Add support for nick.com.pl (#13860) +* [mixcloud] Fix play info decryption (#13867) +* [20min] Fix embeds extraction (#13852) +* [dplayit] Fix extraction (#13851) ++ [niconico] Support videos with multiple formats (#13522) ++ [niconico] Support HTML5-only videos (#13806) + + +version 2017.08.06 + +Core +* Use relative paths for DASH fragments (#12990) + +Extractors +* [pluralsight] Fix format selection +- [mpora] Remove extractor (#13826) ++ [voot] Add support for voot.com (#10255, #11644, #11814, #12350, #13218) +* [vlive:channel] Limit number of videos per page to 100 (#13830) +* [podomatic] Extend URL regular expression (#13827) +* [cinchcast] Extend URL regular expression +* [yandexdisk] Relax URL regular expression (#13824) +* [vidme] Extract DASH and HLS formats +- [teamfour] Remove extractor (#13782) +* [pornhd] Fix extraction (#13783) +* [udemy] Fix subtitles extraction (#13812) +* [mlb] Extend URL regular expression (#13740, #13773) ++ [pbs] Add support for new URL schema (#13801) +* [nrktv] Update API host (#13796) + + +version 2017.07.30.1 + +Core +* [downloader/hls] Use redirect URL as manifest base (#13755) +* [options] Correctly hide login info from debug outputs (#13696) + +Extractors ++ [watchbox] Add support for watchbox.de (#13739) +- [clipfish] Remove extractor ++ [youjizz] Fix extraction (#13744) ++ [generic] Add support for another ooyala embed pattern (#13727) ++ [ard] Add support for lives (#13771) +* [soundcloud] Update client id ++ [soundcloud:trackstation] Add support for track stations (#13733) +* [svtplay] Use geo verification proxy for API request +* [svtplay] Update API URL (#13767) ++ [yandexdisk] Add support for yadi.sk (#13755) ++ [megaphone] Add support for megaphone.fm +* [amcnetworks] Make rating optional (#12453) +* [cloudy] Fix extraction (#13737) ++ [nickru] Add support for nickelodeon.ru +* [mtv] Improve thumbnal extraction +* [nick] Automate geo-restriction bypass (#13711) +* [niconico] Improve error reporting (#13696) + + +version 2017.07.23 + +Core +* [YoutubeDL] Improve default format specification (#13704) +* [YoutubeDL] Do not override id, extractor and extractor_key for + url_transparent entities +* [extractor/common] Fix playlist_from_matches + +Extractors +* [itv] Fix production id extraction (#13671, #13703) +* [vidio] Make duration non fatal and fix typo +* [mtv] Skip missing video parts (#13690) +* [sportbox:embed] Fix extraction ++ [npo] Add support for npo3.nl URLs (#13695) +* [dramafever] Remove video id from title (#13699) ++ [egghead:lesson] Add support for lessons (#6635) +* [funnyordie] Extract more metadata (#13677) +* [youku:show] Fix playlist extraction (#13248) ++ [dispeak] Recognize sevt subdomain (#13276) +* [adn] Improve error reporting (#13663) +* [crunchyroll] Relax series and season regular expression (#13659) ++ [spiegel:article] Add support for nexx iframe embeds (#13029) ++ [nexx:embed] Add support for iframe embeds +* [nexx] Improve JS embed extraction ++ [pearvideo] Add support for pearvideo.com (#13031) + + +version 2017.07.15 + +Core +* [YoutubeDL] Don't expand environment variables in meta fields (#13637) + +Extractors +* [spiegeltv] Delegate extraction to nexx extractor (#13159) ++ [nexx] Add support for nexx.cloud (#10807, #13465) +* [generic] Fix rutube embeds extraction (#13641) +* [karrierevideos] Fix title extraction (#13641) +* [youtube] Don't capture YouTube Red ad for creator meta field (#13621) +* [slideshare] Fix extraction (#13617) ++ [5tv] Add another video URL pattern (#13354, #13606) +* [drtv] Make HLS and HDS extraction non fatal +* [ted] Fix subtitles extraction (#13628, #13629) +* [vine] Make sure the title won't be empty ++ [twitter] Support HLS streams in vmap URLs ++ [periscope] Support pscp.tv URLs in embedded frames +* [twitter] Extract mp4 urls via mobile API (#12726) +* [niconico] Fix authentication error handling (#12486) +* [giantbomb] Extract m3u8 formats (#13626) ++ [vlive:playlist] Add support for playlists (#13613) + + +version 2017.07.09 + +Core ++ [extractor/common] Add support for AMP tags in _parse_html5_media_entries ++ [utils] Support attributes with no values in get_elements_by_attribute + +Extractors ++ [dailymail] Add support for embeds ++ [joj] Add support for joj.sk (#13268) +* [abc.net.au:iview] Extract more formats (#13492, #13489) +* [egghead:course] Fix extraction (#6635, #13370) ++ [cjsw] Add support for cjsw.com (#13525) ++ [eagleplatform] Add support for referrer protected videos (#13557) ++ [eagleplatform] Add support for another embed pattern (#13557) +* [veoh] Extend URL regular expression (#13601) +* [npo:live] Fix live stream id extraction (#13568, #13605) +* [googledrive] Fix height extraction (#13603) ++ [dailymotion] Add support for new layout (#13580) +- [yam] Remove extractor +* [xhamster] Extract all formats and fix duration extraction (#13593) ++ [xhamster] Add support for new URL schema (#13593) +* [espn] Extend URL regular expression (#13244, #13549) +* [kaltura] Fix typo in subtitles extraction (#13569) +* [vier] Adapt extraction to redesign (#13575) + + +version 2017.07.02 + +Core +* [extractor/common] Improve _json_ld + +Extractors ++ [thisoldhouse] Add more fallbacks for video id +* [thisoldhouse] Fix video id extraction (#13540, #13541) +* [xfileshare] Extend format regular expression (#13536) +* [ted] Fix extraction (#13535) ++ [tastytrade] Add support for tastytrade.com (#13521) +* [dplayit] Relax video id regular expression (#13524) ++ [generic] Extract more generic metadata (#13527) ++ [bbccouk] Capture and output error message (#13501, #13518) +* [cbsnews] Relax video info regular expression (#13284, #13503) ++ [facebook] Add support for plugin video embeds and multiple embeds (#13493) +* [soundcloud] Switch to https for API requests (#13502) +* [pandatv] Switch to https for API and download URLs ++ [pandatv] Add support for https URLs (#13491) ++ [niconico] Support sp subdomain (#13494) + + +version 2017.06.25 + +Core ++ [adobepass] Add support for DIRECTV NOW (mso ATTOTT) (#13472) +* [YoutubeDL] Skip malformed formats for better extraction robustness + +Extractors ++ [wsj] Add support for barrons.com (#13470) ++ [ign] Add another video id pattern (#13328) ++ [raiplay:live] Add support for live streams (#13414) ++ [redbulltv] Add support for live videos and segments (#13486) ++ [onetpl] Add support for videos embedded via pulsembed (#13482) +* [ooyala] Make more robust +* [ooyala] Skip empty format URLs (#13471, #13476) +* [hgtv.com:show] Fix typo + + +version 2017.06.23 + +Core +* [adobepass] Fix extraction on older python 2.6 + +Extractors +* [youtube] Adapt to new automatic captions rendition (#13467) +* [hgtv.com:show] Relax video config regular expression (#13279, #13461) +* [drtuber] Fix formats extraction (#12058) +* [youporn] Fix upload date extraction +* [youporn] Improve formats extraction +* [youporn] Fix title extraction (#13456) +* [googledrive] Fix formats sorting (#13443) +* [watchindianporn] Fix extraction (#13411, #13415) ++ [vimeo] Add fallback mp4 extension for original format ++ [ruv] Add support for ruv.is (#13396) +* [viu] Fix extraction on older python 2.6 +* [pandora.tv] Fix upload_date extraction (#12846) ++ [asiancrush] Add support for asiancrush.com (#13420) + + +version 2017.06.18 + +Core +* [downloader/common] Use utils.shell_quote for debug command line +* [utils] Use compat_shlex_quote in shell_quote +* [postprocessor/execafterdownload] Encode command line (#13407) +* [compat] Fix compat_shlex_quote on Windows (#5889, #10254) +* [postprocessor/metadatafromtitle] Fix missing optional meta fields processing + in --metadata-from-title (#13408) +* [extractor/common] Fix json dumping with --geo-bypass ++ [extractor/common] Improve jwplayer subtitles extraction ++ [extractor/common] Improve jwplayer formats extraction (#13379) + +Extractors +* [polskieradio] Fix extraction (#13392) ++ [xfileshare] Add support for fastvideo.me (#13385) +* [bilibili] Fix extraction of videos with double quotes in titles (#13387) +* [4tube] Fix extraction (#13381, #13382) ++ [disney] Add support for disneychannel.de (#13383) +* [npo] Improve URL regular expression (#13376) ++ [corus] Add support for showcase.ca ++ [corus] Add support for history.ca (#13359) + + +version 2017.06.12 + +Core +* [utils] Handle compat_HTMLParseError in extract_attributes (#13349) ++ [compat] Introduce compat_HTMLParseError +* [utils] Improve unified_timestamp +* [extractor/generic] Ensure format id is unicode string +* [extractor/common] Return unicode string from _match_id ++ [YoutubeDL] Sanitize more fields (#13313) + +Extractors ++ [xfileshare] Add support for rapidvideo.tv (#13348) +* [xfileshare] Modernize and pass Referer ++ [rutv] Add support for testplayer.vgtrk.com (#13347) ++ [newgrounds] Extract more metadata (#13232) ++ [newgrounds:playlist] Add support for playlists (#10611) +* [newgrounds] Improve formats and uploader extraction (#13346) +* [msn] Fix formats extraction +* [turbo] Ensure format id is string +* [sexu] Ensure height is int +* [jove] Ensure comment count is int +* [golem] Ensure format id is string +* [gfycat] Ensure filesize is int +* [foxgay] Ensure height is int +* [flickr] Ensure format id is string +* [sohu] Fix numeric fields +* [safari] Improve authentication detection (#13319) +* [liveleak] Ensure height is int (#13313) +* [streamango] Make title optional (#13292) +* [rtlnl] Improve URL regular expression (#13295) +* [tvplayer] Fix extraction (#13291) + + +version 2017.06.05 + +Core +* [YoutubeDL] Don't emit ANSI escape codes on Windows (#13270) + +Extractors ++ [bandcamp:weekly] Add support for bandcamp weekly (#12758) +* [pornhub:playlist] Fix extraction (#13281) +- [godtv] Remove extractor (#13175) +* [safari] Fix typo (#13252) +* [youtube] Improve chapters extraction (#13247) +* [1tv] Lower preference for HTTP formats (#13246) +* [francetv] Relax URL regular expression +* [drbonanza] Fix extraction (#13231) +* [packtpub] Fix authentication (#13240) + + +version 2017.05.29 + +Extractors +* [youtube] Fix DASH MPD extraction for videos with non-encrypted format URLs + (#13211) +* [xhamster] Fix uploader and like/dislike count extraction (#13216)) ++ [xhamster] Extract categories (#11728) ++ [abcnews] Add support for embed URLs (#12851) +* [gaskrank] Fix extraction (#12493) +* [medialaan] Fix videos with missing videoUrl (#12774) +* [dvtv] Fix playlist support ++ [dvtv] Add support for DASH and HLS formats (#3063) ++ [beam:vod] Add support for beam.pro/mixer.com VODs (#13032)) +* [cbsinteractive] Relax URL regular expression (#13213) +* [adn] Fix formats extraction ++ [youku] Extract more metadata (#10433) +* [cbsnews] Fix extraction (#13205) + + +version 2017.05.26 + +Core ++ [utils] strip_jsonp() can recognize more patterns +* [postprocessor/ffmpeg] Fix metadata filename handling on Python 2 (#13182) + +Extractors ++ [youtube] DASH MPDs with cipher signatures are recognized now (#11381) ++ [bbc] Add support for authentication +* [tudou] Merge into youku extractor (#12214) +* [youku:show] Fix extraction +* [youku] Fix extraction (#13191) +* [udemy] Fix extraction for outputs' format entries without URL (#13192) +* [vimeo] Fix formats' sorting (#13189) +* [cbsnews] Fix extraction for 60 Minutes videos (#12861) + + +version 2017.05.23 + +Core ++ [downloader/external] Pass -loglevel to ffmpeg downloader (#13183) ++ [adobepass] Add support for Bright House Networks (#13149) + +Extractors ++ [streamcz] Add support for subtitles (#13174) +* [youtube] Fix DASH manifest signature decryption (#8944, #13156) +* [toggle] Relax URL regular expression (#13172) +* [toypics] Fix extraction (#13077) +* [njpwworld] Fix extraction (#13162, #13169) ++ [hitbox] Add support for smashcast.tv (#13154) +* [mitele] Update app key regular expression (#13158) + + +version 2017.05.18.1 + +Core +* [jsinterp] Fix typo and cleanup regular expressions (#13134) + + +version 2017.05.18 + +Core ++ [jsinterp] Add support for quoted names and indexers (#13123, #13124, #13125, + #13126, #13128, #13129, #13130, #13131, #13132) ++ [extractor/common] Add support for schemeless URLs in _extract_wowza_formats + (#13088, #13092) ++ [utils] Recognize more audio codecs (#13081) + +Extractors ++ [vier] Extract more metadata (#12539) +* [vier] Improve extraction (#12801) + + Add support for authentication + * Bypass authentication when no credentials provided + * Improve extraction robustness +* [dailymail] Fix sources extraction (#13057) +* [dailymotion] Extend URL regular expression (#13079) + + +version 2017.05.14 + +Core ++ [extractor/common] Respect Width and Height attributes in ISM manifests ++ [postprocessor/metadatafromtitle] Add support regular expression syntax for + --metadata-from-title (#13065) + +Extractors ++ [mediaset] Add support for video.mediaset.it (#12708, #12964) +* [orf:radio] Fix extraction (#11643, #12926) +* [aljazeera] Extend URL regular expression (#13053) +* [imdb] Relax URL regular expression (#13056) ++ [francetv] Add support for mobile.france.tv (#13068) ++ [upskill] Add support for upskillcourses.com (#13043) +* [thescene] Fix extraction (#13061) +* [condenast] Improve embed support +* [liveleak] Fix extraction (#12053) ++ [douyu] Support Douyu shows (#12228) +* [myspace] Improve URL regular expression (#13040) +* [adultswim] Use desktop platform in assets URL (#13041) + + +version 2017.05.09 + +Core +* [YoutubeDL] Force --restrict-filenames when no locale is set on all python + versions (#13027) + +Extractors +* [francetv] Adapt to site redesign (#13034) ++ [packtpub] Add support for authentication (#12622) +* [drtv] Lower preference for SignLanguage formats (#13013, #13016) ++ [cspan] Add support for brightcove live embeds (#13028) +* [vrv] Extract DASH formats and subtitles +* [funimation] Fix authentication (#13021) +* [adultswim] Fix extraction (#8640, #10950, #11042, #12121) + + Add support for Adobe Pass authentication + + Add support for live streams + + Add support for show pages +* [turner] Extract thumbnail, is_live and strip description ++ [nonktube] Add support for nonktube.com (#8647, #13024) ++ [nuevo] Pass headers to _extract_nuevo +* [nbc] Improve extraction (#12364) + + +version 2017.05.07 + +Common +* [extractor/common] Fix typo in _extract_akamai_formats ++ [postprocessor/ffmpeg] Embed chapters into media file with --add-metadata ++ [extractor/common] Introduce chapters meta field + +Extractors +* [youtube] Fix authentication (#12820, #12927, #12973, #12992, #12993, #12995, + #13003) +* [bilibili] Fix video downloading (#13001) +* [rmcdecouverte] Fix extraction (#12937) +* [theplatform] Extract chapters +* [bandcamp] Fix thumbnail extraction (#12980) +* [pornhub] Extend URL regular expression (#12996) ++ [youtube] Extract chapters ++ [nrk] Extract chapters ++ [vice] Add support for ooyala embeds in article pages ++ [vice] Support vice articles (#12968) +* [vice] Fix extraction for non en_us videos (#12967) +* [gdcvault] Fix extraction for some videos (#12733) +* [pbs] Improve multipart video support (#12981) +* [laola1tv] Fix extraction (#12880) ++ [cda] Support birthday verification (#12789) +* [leeco] Fix extraction (#12974) ++ [pbs] Extract chapters +* [amp] Imporove thumbnail and subtitles extraction +* [foxsports] Fix extraction (#12945) +- [coub] Remove comment count extraction (#12941) + + +version 2017.05.01 + +Core ++ [extractor/common] Extract view count from JSON-LD +* [utils] Improve unified_timestamp ++ [utils] Add video/mp2t to mimetype2ext +* [downloader/external] Properly handle live stream downloading cancellation + (#8932) ++ [utils] Add support for unicode whitespace in clean_html on python 2 (#12906) + +Extractors +* [infoq] Make audio format extraction non fatal (#12938) +* [brightcove] Allow whitespace around attribute names in embedded code ++ [zaq1] Add support for zaq1.pl (#12693) ++ [xvideos] Extract duration (#12828) +* [vevo] Fix extraction (#12879) ++ [noovo] Add support for noovo.ca (#12792) ++ [washingtonpost] Add support for embeds (#12699) +* [yandexmusic:playlist] Fix extraction for python 3 (#12888) +* [anvato] Improve extraction (#12913) + * Promote to regular shortcut based extractor + * Add mcp to access key mapping table + * Add support for embeds extraction + * Add support for anvato embeds in generic extractor +* [xtube] Fix extraction for older FLV videos (#12734) +* [tvplayer] Fix extraction (#12908) + + +version 2017.04.28 + +Core ++ [adobepass] Use geo verification headers for all requests +- [downloader/fragment] Remove assert for resume_len when no fragments + downloaded ++ [extractor/common] Add manifest_url for explicit group rendition formats +* [extractor/common] Fix manifest_url for m3u8 formats +- [extractor/common] Don't list master m3u8 playlists in format list (#12832) + +Extractor +* [aenetworks] Fix extraction for shows with single season ++ [go] Add support for Disney, DisneyJunior and DisneyXD show pages +* [youtube] Recognize new locale-based player URLs (#12885) ++ [streamable] Add support for new embedded URL schema (#12844) +* [arte:+7] Relax URL regular expression (#12837) + + +version 2017.04.26 + +Core +* Introduce --keep-fragments for keeping fragments of fragmented download + on disk after download is finished +* [YoutubeDL] Fix output template for missing timestamp (#12796) +* [socks] Handle cases where credentials are required but missing +* [extractor/common] Improve HLS extraction (#12211) + * Extract m3u8 parsing to separate method + * Improve rendition groups extraction + * Build stream name according stream GROUP-ID + * Ignore reference to AUDIO group without URI when stream has no CODECS + * Use float for scaled tbr in _parse_m3u8_formats +* [utils] Add support for TTML styles in dfxp2srt +* [downloader/hls] No need to download keys for fragments that have been + already downloaded +* [downloader/fragment] Improve fragment downloading + * Resume immediately + * Don't concatenate fragments and decrypt them on every resume + * Optimize disk storage usage, don't store intermediate fragments on disk + * Store bookkeeping download state file ++ [extractor/common] Add support for multiple getters in try_get ++ [extractor/common] Add support for video of WebPage context in _json_ld + (#12778) ++ [extractor/common] Relax JWPlayer regular expression and remove + duplicate URLs (#12768) + +Extractors +* [iqiyi] Fix extraction of Yule videos +* [vidio] Improve extraction and sort formats ++ [brightcove] Match only video elements with data-video-id attribute +* [iqiyi] Fix playlist detection (#12504) +- [azubu] Remove extractor (#12813) +* [porn91] Fix extraction (#12814) +* [vidzi] Fix extraction (#12793) ++ [amp] Extract error message (#12795) ++ [xfileshare] Add support for gorillavid.com and daclips.com (#12776) +* [instagram] Fix extraction (#12777) ++ [generic] Support Brightcove videos in <iframe> (#12482) ++ [brightcove] Support URLs with bcpid instead of playerID (#12482) +* [brightcove] Fix _extract_url (#12782) ++ [odnoklassniki] Extract HLS formats + + +version 2017.04.17 + +Extractors +* [limelight] Improve extraction LimelightEmbeddedPlayerFlash media embeds and + add support for channel and channelList embeds +* [generic] Extract multiple Limelight embeds (#12761) ++ [itv] Extract series metadata +* [itv] Fix RTMP formats downloading (#12759) +* [itv] Use native HLS downloader by default ++ [go90] Extract subtitles (#12752) ++ [go90] Extract series metadata (#12752) + + +version 2017.04.16 + +Core +* [YoutubeDL] Apply expand_path after output template substitution ++ [YoutubeDL] Propagate overridden meta fields to extraction results of type + url (#11163) + +Extractors ++ [generic] Extract RSS entries as url_transparent (#11163) ++ [streamango] Add support for streamango.com (#12643) ++ [wsj:article] Add support for articles (#12558) +* [brightcove] Relax video tag embeds extraction and validate ambiguous embeds' + URLs (#9163, #12005, #12178, #12480) ++ [udemy] Add support for react rendition (#12744) + + +version 2017.04.15 + +Extractors +* [youku] Fix fileid extraction (#12741, #12743) + + +version 2017.04.14 + +Core ++ [downloader/hls] Add basic support for EXT-X-BYTERANGE tag (#10955) ++ [adobepass] Improve Comcast and Verizon login code (#10803) ++ [adobepass] Add support for Verizon (#10803) + +Extractors ++ [aenetworks] Add support for specials (#12723) ++ [hbo] Extract HLS formats ++ [go90] Add support for go90.com (#10127) ++ [tv2hu] Add support for tv2.hu (#10509) ++ [generic] Exclude URLs with xml ext from valid video URLs (#10768, #11654) +* [youtube] Improve HLS formats extraction +* [afreecatv] Fix extraction for videos with different key layout (#12718) +- [youtube] Remove explicit preference for audio-only and video-only formats in + order not to break sorting when new formats appear +* [canalplus] Bypass geo restriction + + +version 2017.04.11 + +Extractors +* [afreecatv] Fix extraction (#12706) ++ [generic] Add support for <object> YouTube embeds (#12637) +* [bbccouk] Treat bitrate as audio+video bitrate in media selector ++ [bbccouk] Skip unrecognized formats in media selector (#12701) ++ [bbccouk] Add support for https protocol in media selector (#12701) +* [curiositystream] Fix extraction (#12638) +* [adn] Update subtitle decryption key +* [chaturbate] Fix extraction (#12665, #12688, #12690) + + +version 2017.04.09 + +Extractors ++ [medici] Add support for medici.tv (#3406) ++ [rbmaradio] Add support for redbullradio.com URLs (#12687) ++ [npo:live] Add support for default URL (#12555) +* [mixcloud:playlist] Fix title, description and view count extraction (#12582) ++ [thesun] Add suport for thesun.co.uk (#11298, #12674) ++ [ceskateleveize:porady] Add support for porady (#7411, #12645) +* [ceskateleveize] Improve extraction and remove URL replacement hacks ++ [kaltura] Add support for iframe embeds (#12679) +* [airmozilla] Fix extraction (#12670) +* [wshh] Extract html5 entries and delegate to generic extractor (12676) ++ [raiplay] Extract subtitles ++ [xfileshare] Add support for vidlo.us (#12660) ++ [xfileshare] Add support for vidbom.com (#12661) ++ [aenetworks] Add more video URL regular expressions (#12657) ++ [odnoklassniki] Fix format sorting for 1080p quality ++ [rtl2] Add support for you.rtl2.de (#10257) ++ [vshare] Add support for vshare.io (#12278) + + +version 2017.04.03 + +Core ++ [extractor/common] Add censorship check for TransTelekom ISP +* [extractor/common] Move censorship checks to a separate method + +Extractors ++ [discoveryvr] Add support for discoveryvr.com (#12578) ++ [tv5mondeplus] Add support for tv5mondeplus.com (#11386) ++ [periscope] Add support for pscp.tv URLs (#12618, #12625) + + +version 2017.04.02 + +Core +* [YoutubeDL] Return early when extraction of url_transparent fails + +Extractors +* [rai] Fix and improve extraction (#11790) ++ [vrv] Add support for series pages +* [limelight] Improve extraction for audio only formats +* [funimation] Fix extraction (#10696, #11773) ++ [xfileshare] Add support for vidabc.com (#12589) ++ [xfileshare] Improve extraction and extract hls formats ++ [crunchyroll] Pass geo verifcation proxy ++ [cwtv] Extract ISM formats ++ [tvplay] Bypass geo restriction ++ [vrv] Add support for vrv.co ++ [packtpub] Add support for packtpub.com (#12610) ++ [generic] Pass base_url to _parse_jwplayer_data ++ [adn] Add support for animedigitalnetwork.fr (#4866) ++ [allocine] Extract more metadata +* [allocine] Fix extraction (#12592) +* [openload] Fix extraction + + +version 2017.03.26 + +Core +* Don't raise an error if JWPlayer config data is not a Javascript object + literal. _find_jwplayer_data now returns a dict rather than an str. (#12307) +* Expand environment variables for options representing paths (#12556) ++ [utils] Introduce expand_path +* [downloader/hls] Delegate downloading to ffmpeg immediately for live streams + +Extractors +* [afreecatv] Fix extraction (#12179) ++ [atvat] Add support for atv.at (#5325) ++ [fox] Add metadata extraction (#12391) ++ [atresplayer] Extract DASH formats ++ [atresplayer] Extract HD manifest (#12548) +* [atresplayer] Fix login error detection (#12548) +* [franceculture] Fix extraction (#12547) +* [youtube] Improve URL regular expression (#12538) +* [generic] Do not follow redirects to the same URL + + +version 2017.03.24 + +Extractors +- [9c9media] Remove mp4 URL extraction request ++ [bellmedia] Add support for etalk.ca and space.ca (#12447) +* [channel9] Fix extraction (#11323) +* [cloudy] Fix extraction (#12525) ++ [hbo] Add support for free episode URLs and new formats extraction (#12519) +* [condenast] Fix extraction and style (#12526) +* [viu] Relax URL regular expression (#12529) + + +version 2017.03.22 + +Extractors +- [pluralsight] Omit module title from video title (#12506) +* [pornhub] Decode obfuscated video URL (#12470, #12515) +* [senateisvp] Allow https URL scheme for embeds (#12512) + + +version 2017.03.20 + +Core ++ [YoutubeDL] Allow multiple input URLs to be used with stdout (-) as + output template ++ [adobepass] Detect and output error on authz token extraction (#12472) + +Extractors ++ [bostonglobe] Add extractor for bostonglobe.com (#12099) ++ [toongoggles] Add support for toongoggles.com (#12171) ++ [medialaan] Add support for Medialaan sites (#9974, #11912) ++ [discoverynetworks] Add support for more domains and bypass geo restiction +* [openload] Fix extraction (#10408) + + +version 2017.03.16 + +Core ++ [postprocessor/ffmpeg] Add support for flac ++ [extractor/common] Extract SMIL formats from jwplayer + +Extractors ++ [generic] Add forgotten return for jwplayer formats +* [redbulltv] Improve extraction + + +version 2017.03.15 + +Core +* Fix missing subtitles if --add-metadata is used (#12423) + +Extractors +* [facebook] Make title optional (#12443) ++ [mitele] Add support for ooyala videos (#12430) +* [openload] Fix extraction (#12435, #12446) +* [streamable] Update API URL (#12433) ++ [crunchyroll] Extract season name (#12428) +* [discoverygo] Bypass geo restriction ++ [discoverygo:playlist] Add support for playlists (#12424) + + +version 2017.03.10 + +Extractors +* [generic] Make title optional for jwplayer embeds (#12410) +* [wdr:maus] Fix extraction (#12373) +* [prosiebensat1] Improve title extraction (#12318, #12327) +* [dplayit] Separate and rewrite extractor and bypass geo restriction (#12393) +* [miomio] Fix extraction (#12291, #12388, #12402) +* [telequebec] Fix description extraction (#12399) +* [openload] Fix extraction (#12357) +* [brightcove:legacy] Relax videoPlayer validation check (#12381) + + +version 2017.03.07 + +Core +* Metadata are now added after conversion (#5594) + +Extractors +* [soundcloud] Update client id (#12376) +* [openload] Fix extraction (#10408, #12357) + + +version 2017.03.06 + +Core ++ [utils] Process bytestrings in urljoin (#12369) +* [extractor/common] Improve height extraction and extract bitrate +* [extractor/common] Move jwplayer formats extraction in separate method ++ [external:ffmpeg] Limit test download size to 10KiB (#12362) + +Extractors ++ [drtv] Add geo countries to GeoRestrictedError ++ [drtv:live] Bypass geo restriction ++ [tunepk] Add extractor (#12197, #12243) + + +version 2017.03.05 + +Extractors ++ [twitch] Add basic support for two-factor authentication (#11974) ++ [vier] Add support for vijf.be (#12304) ++ [redbulltv] Add support for redbull.tv (#3919, #11948) +* [douyutv] Switch to the PC API to escape the 5-min limitation (#12316) ++ [generic] Add support for rutube embeds ++ [rutube] Relax URL regular expression ++ [vrak] Add support for vrak.tv (#11452) ++ [brightcove:new] Add ability to smuggle geo_countries into URL ++ [brightcove:new] Raise GeoRestrictedError +* [go] Relax URL regular expression (#12341) +* [24video] Use original host for requests (#12339) +* [ruutu] Disable DASH formats (#12322) + + +version 2017.03.02 + +Core ++ [adobepass] Add support for Charter Spectrum (#11465) +* [YoutubeDL] Don't sanitize identifiers in output template (#12317) + +Extractors +* [facebook] Fix extraction (#12323, #12330) +* [youtube] Mark errors about rental videos as expected (#12324) ++ [npo] Add support for audio +* [npo] Adapt to app.php API (#12311, #12320) + + +version 2017.02.28 + +Core ++ [utils] Add bytes_to_long and long_to_bytes ++ [utils] Add pkcs1pad ++ [aes] Add aes_cbc_encrypt + +Extractors ++ [azmedien:showplaylist] Add support for show playlists (#12160) ++ [youtube:playlist] Recognize another playlist pattern (#11928, #12286) ++ [daisuki] Add support for daisuki.net (#2486, #3186, #4738, #6175, #7776, + #10060) +* [douyu] Fix extraction (#12301) + + +version 2017.02.27 + +Core +* [downloader/common] Limit displaying 2 digits after decimal point in sleep + interval message (#12183) ++ [extractor/common] Add preference to _parse_html5_media_entries + +Extractors ++ [npo] Add support for zapp.nl ++ [npo] Add support for hetklokhuis.nl (#12293) +- [scivee] Remove extractor (#9315) ++ [cda] Decode download URL (#12255) ++ [crunchyroll] Improve uploader extraction (#12267) ++ [youtube] Raise GeoRestrictedError ++ [dailymotion] Raise GeoRestrictedError ++ [mdr] Recognize more URL patterns (#12169) ++ [tvigle] Raise GeoRestrictedError +* [vevo] Fix extraction for videos with the new streams/streamsV3 format + (#11719) ++ [freshlive] Add support for freshlive.tv (#12175) ++ [xhamster] Capture and output videoClosed error (#12263) ++ [etonline] Add support for etonline.com (#12236) ++ [njpwworld] Add support for njpwworld.com (#11561) +* [amcnetworks] Relax URL regular expression (#12127) + + +version 2017.02.24.1 + +Extractors +* [noco] Modernize +* [noco] Switch login URL to https (#12246) ++ [thescene] Extract more metadata +* [thescene] Fix extraction (#12235) ++ [tubitv] Use geo bypass mechanism +* [openload] Fix extraction (#10408) ++ [ivi] Raise GeoRestrictedError + + +version 2017.02.24 + +Core +* [options] Hide deprecated options from --help +* [options] Deprecate --autonumber-size ++ [YoutubeDL] Add support for string formatting operations in output template + (#5185, #5748, #6841, #9929, #9966 #9978, #12189) + +Extractors ++ [lynda:course] Add webpage extraction fallback (#12238) +* [go] Sign all uplynk URLs and use geo bypass only for free videos + (#12087, #12210) ++ [skylinewebcams] Add support for skylinewebcams.com (#12221) ++ [instagram] Add support for multi video posts (#12226) ++ [crunchyroll] Extract playlist entries ids +* [mgtv] Fix extraction ++ [sohu] Raise GeoRestrictedError ++ [leeco] Raise GeoRestrictedError and use geo bypass mechanism + + +version 2017.02.22 + +Extractors +* [crunchyroll] Fix descriptions with double quotes (#12124) +* [dailymotion] Make comment count optional (#12209) ++ [vidzi] Add support for vidzi.cc (#12213) ++ [24video] Add support for 24video.tube (#12217) ++ [crackle] Use geo bypass mechanism ++ [viewster] Use geo verification headers ++ [tfo] Improve geo restriction detection and use geo bypass mechanism ++ [telequebec] Use geo bypass mechanism ++ [limelight] Extract PlaylistService errors and improve geo restriction + detection + + +version 2017.02.21 + +Core +* [extractor/common] Allow calling _initialize_geo_bypass from extractors + (#11970) ++ [adobepass] Add support for Time Warner Cable (#12191) ++ [travis] Run tests in parallel ++ [downloader/ism] Honor HTTP headers when downloading fragments ++ [downloader/dash] Honor HTTP headers when downloading fragments ++ [utils] Add GeoUtils class for working with geo tools and GeoUtils.random_ipv4 ++ Add option --geo-bypass-country for explicit geo bypass on behalf of + specified country ++ Add options to control geo bypass mechanism --geo-bypass and --no-geo-bypass ++ Add experimental geo restriction bypass mechanism based on faking + X-Forwarded-For HTTP header ++ [utils] Introduce GeoRestrictedError for geo restricted videos ++ [utils] Introduce YoutubeDLError base class for all youtube-dl exceptions + +Extractors ++ [ninecninemedia] Use geo bypass mechanism +* [spankbang] Make uploader optional (#12193) ++ [iprima] Improve geo restriction detection and disable geo bypass +* [iprima] Modernize +* [commonmistakes] Disable UnicodeBOM extractor test for python 3.2 ++ [prosiebensat1] Throw ExtractionError on unsupported page type (#12180) +* [nrk] Update _API_HOST and relax _VALID_URL ++ [tv4] Bypass geo restriction and improve detection +* [tv4] Switch to hls3 protocol (#12177) ++ [viki] Improve geo restriction detection ++ [vgtv] Improve geo restriction detection ++ [srgssr] Improve geo restriction detection ++ [vbox7] Improve geo restriction detection and use geo bypass mechanism ++ [svt] Improve geo restriction detection and use geo bypass mechanism ++ [pbs] Improve geo restriction detection and use geo bypass mechanism ++ [ondemandkorea] Improve geo restriction detection and use geo bypass mechanism ++ [nrk] Improve geo restriction detection and use geo bypass mechanism ++ [itv] Improve geo restriction detection and use geo bypass mechanism ++ [go] Improve geo restriction detection and use geo bypass mechanism ++ [dramafever] Improve geo restriction detection and use geo bypass mechanism +* [brightcove:legacy] Restrict videoPlayer value (#12040) ++ [tvn24] Add support for tvn24.pl and tvn24bis.pl (#11679) ++ [thisav] Add support for HTML5 media (#11771) +* [metacafe] Bypass family filter (#10371) +* [viceland] Improve info extraction + + +version 2017.02.17 + +Extractors +* [heise] Improve extraction (#9725) +* [ellentv] Improve (#11653) +* [openload] Fix extraction (#10408, #12002) ++ [theplatform] Recognize URLs with whitespaces (#12044) +* [einthusan] Relax URL regular expression (#12141, #12159) ++ [generic] Support complex JWPlayer embedded videos (#12030) +* [elpais] Improve extraction (#12139) + + +version 2017.02.16 + +Core ++ [utils] Add support for quoted string literals in --match-filter (#8050, + #12142, #12144) + +Extractors +* [ceskatelevize] Lower priority for audio description sources (#12119) +* [amcnetworks] Fix extraction (#12127) +* [pinkbike] Fix uploader extraction (#12054) ++ [onetpl] Add support for businessinsider.com.pl and plejada.pl ++ [onetpl] Add support for onet.pl (#10507) ++ [onetmvp] Add shortcut extractor ++ [vodpl] Add support for vod.pl (#12122) ++ [pornhub] Extract video URL from tv platform site (#12007, #12129) ++ [ceskatelevize] Extract DASH formats (#12119, #12133) + + +version 2017.02.14 + +Core +* TypeError is fixed with Python 2.7.13 on Windows (#11540, #12085) + +Extractor +* [zdf] Fix extraction (#12117) +* [xtube] Fix extraction for both kinds of video id (#12088) +* [xtube] Improve title extraction (#12088) ++ [lemonde] Fallback delegate extraction to generic extractor (#12115, #12116) +* [bellmedia] Allow video id longer than 6 characters (#12114) ++ [limelight] Add support for referer protected videos +* [disney] Improve extraction (#4975, #11000, #11882, #11936) +* [hotstar] Improve extraction (#12096) +* [einthusan] Fix extraction (#11416) ++ [aenetworks] Add support for lifetimemovieclub.com (#12097) +* [youtube] Fix parsing codecs (#12091) + + +version 2017.02.11 + +Core ++ [utils] Introduce get_elements_by_class and get_elements_by_attribute + utility functions ++ [extractor/common] Skip m3u8 manifests protected with Adobe Flash Access + +Extractor +* [pluralsight:course] Fix extraction (#12075) ++ [bbc] Extract m3u8 formats with 320k audio +* [facebook] Relax video id matching (#11017, #12055, #12056) ++ [corus] Add support for Corus Entertainment sites (#12060, #9164) ++ [pluralsight] Detect blocked account error message (#12070) ++ [bloomberg] Add another video id pattern (#12062) +* [extractor/commonmistakes] Restrict URL regular expression (#12050) ++ [tvplayer] Add support for tvplayer.com + + +version 2017.02.10 + +Extractors +* [xtube] Fix extraction (#12023) +* [pornhub] Fix extraction (#12007, #12018) +* [facebook] Improve JS data regular expression (#12042) +* [kaltura] Improve embed partner id extraction (#12041) ++ [sprout] Add support for sproutonline.com +* [6play] Improve extraction ++ [scrippsnetworks:watch] Add support for Scripps Networks sites (#10765) ++ [go] Add support for Adobe Pass authentication (#11468, #10831) +* [6play] Fix extraction (#12011) ++ [nbc] Add support for Adobe Pass authentication (#12006) + + +version 2017.02.07 + +Core +* [extractor/common] Fix audio only with audio group in m3u8 (#11995) ++ [downloader/fragment] Respect --no-part +* [extractor/common] Speed-up HTML5 media entries extraction (#11979) + +Extractors +* [pornhub] Fix extraction (#11997) ++ [canalplus] Add support for cstar.fr (#11990) ++ [extractor/generic] Improve RTMP support (#11993) ++ [gaskrank] Add support for gaskrank.tv (#11685) +* [bandcamp] Fix extraction for incomplete albums (#11727) +* [iwara] Fix extraction (#11781) +* [googledrive] Fix extraction on Python 3.6 ++ [videopress] Add support for videopress.com ++ [afreecatv] Extract RTMP formats + + +version 2017.02.04.1 + +Extractors ++ [twitch:stream] Add support for player.twitch.tv (#11971) +* [radiocanada] Fix extraction for toutv rtmp formats + + +version 2017.02.04 + +Core ++ Add --playlist-random to shuffle playlists (#11889, #11901) +* [utils] Improve comments processing in js_to_json (#11947) +* [utils] Handle single-line comments in js_to_json +* [downloader/external:ffmpeg] Minimize the use of aac_adtstoasc filter + +Extractors ++ [piksel] Add another app token pattern (#11969) ++ [vk] Capture and output author blocked error message (#11965) ++ [turner] Fix secure HLS formats downloading with ffmpeg (#11358, #11373, + #11800) ++ [drtv] Add support for live and radio sections (#1827, #3427) +* [myspace] Fix extraction and extract HLS and HTTP formats ++ [youtube] Add format info for itag 325 and 328 +* [vine] Fix extraction (#11955) +- [sportbox] Remove extractor (#11954) ++ [filmon] Add support for filmon.com (#11187) ++ [infoq] Add audio only formats (#11565) +* [douyutv] Improve room id regular expression (#11931) +* [iprima] Fix extraction (#11920, #11896) +* [youtube] Fix ytsearch when cookies are provided (#11924) +* [go] Relax video id regular expression (#11937) +* [facebook] Fix title extraction (#11941) ++ [youtube:playlist] Recognize TL playlists (#11945) ++ [bilibili] Support new Bangumi URLs (#11845) ++ [cbc:watch] Extract audio codec for audio only formats (#11893) ++ [elpais] Fix extraction for some URLs (#11765) + + +version 2017.02.01 + +Extractors ++ [facebook] Add another fallback extraction scenario (#11926) +* [prosiebensat1] Fix extraction of descriptions (#11810, #11929) +- [crunchyroll] Remove ScaledBorderAndShadow settings (#9028) ++ [vimeo] Extract upload timestamp ++ [vimeo] Extract license (#8726, #11880) ++ [nrk:series] Add support for series (#11571, #11711) + + +version 2017.01.31 + +Core ++ [compat] Add compat_etree_register_namespace + +Extractors +* [youtube] Fix extraction for domainless player URLs (#11890, #11891, #11892, + #11894, #11895, #11897, #11900, #11903, #11904, #11906, #11907, #11909, + #11913, #11914, #11915, #11916, #11917, #11918, #11919) ++ [vimeo] Extract both mixed and separated DASH formats ++ [ruutu] Extract DASH formats +* [itv] Fix extraction for python 2.6 + + +version 2017.01.29 + +Core +* [extractor/common] Fix initialization template (#11605, #11825) ++ [extractor/common] Document fragment_base_url and fragment's path fields +* [extractor/common] Fix duration per DASH segment (#11868) ++ Introduce --autonumber-start option for initial value of %(autonumber)s + template (#727, #2702, #9362, #10457, #10529, #11862) + +Extractors ++ [azmedien:playlist] Add support for topic and themen playlists (#11817) +* [npo] Fix subtitles extraction ++ [itv] Extract subtitles ++ [itv] Add support for itv.com (#9240) ++ [mtv81] Add support for mtv81.com (#7619) ++ [vlive] Add support for channels (#11826) ++ [kaltura] Add fallback for fileExt ++ [kaltura] Improve uploader_id extraction ++ [konserthusetplay] Add support for rspoplay.se (#11828) + + +version 2017.01.28 + +Core +* [utils] Improve parse_duration + +Extractors +* [crunchyroll] Improve series and season metadata extraction (#11832) +* [soundcloud] Improve formats extraction and extract audio bitrate ++ [soundcloud] Extract HLS formats +* [soundcloud] Fix track URL extraction (#11852) ++ [twitch:vod] Expand URL regular expressions (#11846) +* [aenetworks] Fix season episodes extraction (#11669) ++ [tva] Add support for videos.tva.ca (#11842) +* [jamendo] Improve and extract more metadata (#11836) ++ [disney] Add support for Disney sites (#7409, #11801, #4975, #11000) +* [vevo] Remove request to old API and catch API v2 errors ++ [cmt,mtv,southpark] Add support for episode URLs (#11837) ++ [youtube] Add fallback for duration extraction (#11841) + + +version 2017.01.25 + +Extractors ++ [openload] Fallback video extension to mp4 ++ [extractor/generic] Add support for Openload embeds (#11536, #11812) +* [srgssr] Fix rts video extraction (#11831) ++ [afreecatv:global] Add support for afreeca.tv (#11807) ++ [crackle] Extract vtt subtitles ++ [crackle] Extract multiple resolutions for thumbnails ++ [crackle] Add support for mobile URLs ++ [konserthusetplay] Extract subtitles (#11823) ++ [konserthusetplay] Add support for HLS videos (#11823) +* [vimeo:review] Fix config URL extraction (#11821) + + +version 2017.01.24 + +Extractors +* [pluralsight] Fix extraction (#11820) ++ [nextmedia] Add support for NextTV (壹電視) +* [24video] Fix extraction (#11811) +* [youtube:playlist] Fix nonexistent and private playlist detection (#11604) ++ [chirbit] Extract uploader (#11809) + + +version 2017.01.22 + +Extractors ++ [pornflip] Add support for pornflip.com (#11556, #11795) +* [chaturbate] Fix extraction (#11797, #11802) ++ [azmedien] Add support for AZ Medien sites (#11784, #11785) ++ [nextmedia] Support redirected URLs ++ [vimeo:channel] Extract videos' titles for playlist entries (#11796) ++ [youtube] Extract episode metadata (#9695, #11774) ++ [cspan] Support Ustream embedded videos (#11547) ++ [1tv] Add support for HLS videos (#11786) +* [uol] Fix extraction (#11770) +* [mtv] Relax triforce feed regular expression (#11766) + + +version 2017.01.18 + +Extractors +* [bilibili] Fix extraction (#11077) ++ [canalplus] Add fallback for video id (#11764) +* [20min] Fix extraction (#11683, #11751) +* [imdb] Extend URL regular expression (#11744) ++ [naver] Add support for tv.naver.com links (#11743) + + +version 2017.01.16 + +Core +* [options] Apply custom config to final composite configuration (#11741) +* [YoutubeDL] Improve protocol auto determining (#11720) + +Extractors +* [xiami] Relax URL regular expressions +* [xiami] Improve track metadata extraction (#11699) ++ [limelight] Check hand-make direct HTTP links ++ [limelight] Add support for direct HTTP links at video.llnw.net (#11737) ++ [brightcove] Recognize another player ID pattern (#11688) ++ [niconico] Support login via cookies (#7968) +* [yourupload] Fix extraction (#11601) ++ [beam:live] Add support for beam.pro live streams (#10702, #11596) +* [vevo] Improve geo restriction detection ++ [dramafever] Add support for URLs with language code (#11714) +* [cbc] Improve playlist support (#11704) + + +version 2017.01.14 + +Core ++ [common] Add ability to customize akamai manifest host ++ [utils] Add more date formats + +Extractors +- [mtv] Eliminate _transform_rtmp_url +* [mtv] Generalize triforce mgid extraction ++ [cmt] Add support for full episodes and video clips (#11623) ++ [mitele] Extract DASH formats ++ [ooyala] Add support for videos with embedToken (#11684) +* [mixcloud] Fix extraction (#11674) +* [openload] Fix extraction (#10408) +* [tv4] Improve extraction (#11698) +* [freesound] Fix and improve extraction (#11602) ++ [nick] Add support for beta.nick.com (#11655) +* [mtv,cc] Use HLS by default with native HLS downloader (#11641) +* [mtv] Fix non-HLS extraction + + +version 2017.01.10 + +Extractors +* [youtube] Fix extraction (#11663, #11664) ++ [inc] Add support for inc.com (#11277, #11647) ++ [youtube] Add itag 212 (#11575) ++ [egghead:course] Add support for egghead.io courses + + +version 2017.01.08 + +Core +* Fix "invalid escape sequence" errors under Python 3.6 (#11581) + +Extractors ++ [hitrecord] Add support for hitrecord.org (#10867, #11626) +- [videott] Remove extractor +* [swrmediathek] Improve extraction +- [sharesix] Remove extractor +- [aol:features] Remove extractor +* [sendtonews] Improve info extraction +* [3sat,phoenix] Fix extraction (#11619) +* [comedycentral/mtv] Add support for HLS videos (#11600) +* [discoverygo] Fix JSON data parsing (#11219, #11522) + + +version 2017.01.05 + +Extractors ++ [zdf] Fix extraction (#11055, #11063) +* [pornhub:playlist] Improve extraction (#11594) ++ [cctv] Add support for ncpa-classic.com (#11591) ++ [tunein] Add support for embeds (#11579) + + +version 2017.01.02 + +Extractors +* [cctv] Improve extraction (#879, #6753, #8541) ++ [nrktv:episodes] Add support for episodes (#11571) ++ [arkena] Add support for video.arkena.com (#11568) + + +version 2016.12.31 + +Core ++ Introduce --config-location option for custom configuration files (#6745, + #10648) + +Extractors ++ [twitch] Add support for player.twitch.tv (#11535, #11537) ++ [videa] Add support for videa.hu (#8181, #11133) +* [vk] Fix postlive videos extraction +* [vk] Extract from playerParams (#11555) +- [freevideo] Remove extractor (#11515) ++ [showroomlive] Add support for showroom-live.com (#11458) +* [xhamster] Fix duration extraction (#11549) +* [rtve:live] Fix extraction (#11529) +* [brightcove:legacy] Improve embeds detection (#11523) ++ [twitch] Add support for rechat messages (#11524) +* [acast] Fix audio and timestamp extraction (#11521) + + +version 2016.12.22 + +Core +* [extractor/common] Improve detection of video-only formats in m3u8 + manifests (#11507) + +Extractors ++ [theplatform] Pass geo verification headers to SMIL request (#10146) ++ [viu] Pass geo verification headers to auth request +* [rtl2] Extract more formats and metadata +* [vbox7] Skip malformed JSON-LD (#11501) +* [uplynk] Force downloading using native HLS downloader (#11496) ++ [laola1] Add support for another extraction scenario (#11460) + + +version 2016.12.20 + +Core +* [extractor/common] Improve fragment URL construction for DASH media +* [extractor/common] Fix codec information extraction for mixed audio/video + DASH media (#11490) + +Extractors +* [vbox7] Fix extraction (#11494) ++ [uktvplay] Add support for uktvplay.uktv.co.uk (#11027) ++ [piksel] Add support for player.piksel.com (#11246) ++ [vimeo] Add support for DASH formats +* [vimeo] Fix extraction for HLS formats (#11490) +* [kaltura] Fix wrong widget ID in some cases (#11480) ++ [nrktv:direkte] Add support for live streams (#11488) +* [pbs] Fix extraction for geo restricted videos (#7095) +* [brightcove:new] Skip widevine classic videos ++ [viu] Add support for viu.com (#10607, #11329) + + +version 2016.12.18 + +Core ++ [extractor/common] Recognize DASH formats in html5 media entries + +Extractors ++ [ccma] Add support for ccma.cat (#11359) +* [laola1tv] Improve extraction ++ [laola1tv] Add support embed URLs (#11460) +* [nbc] Fix extraction for MSNBC videos (#11466) +* [twitch] Adapt to new videos pages URL schema (#11469) ++ [meipai] Add support for meipai.com (#10718) +* [jwplatform] Improve subtitles and duration extraction ++ [ondemandkorea] Add support for ondemandkorea.com (#10772) ++ [vvvvid] Add support for vvvvid.it (#5915) + + +version 2016.12.15 + +Core ++ [utils] Add convenience urljoin + +Extractors ++ [openload] Recognize oload.tv URLs (#10408) ++ [facebook] Recognize .onion URLs (#11443) +* [vlive] Fix extraction (#11375, #11383) ++ [canvas] Extract DASH formats ++ [melonvod] Add support for vod.melon.com (#11419) + + +version 2016.12.12 + +Core ++ [utils] Add common user agents map ++ [common] Recognize HLS manifests that contain video only formats (#11394) + +Extractors ++ [dplay] Use Safari user agent for HLS (#11418) ++ [facebook] Detect login required error message +* [facebook] Improve video selection (#11390) ++ [canalplus] Add another video id pattern (#11399) +* [mixcloud] Relax URL regular expression (#11406) +* [ctvnews] Relax URL regular expression (#11394) ++ [rte] Capture and output error message (#7746, #10498) ++ [prosiebensat1] Add support for DASH formats +* [srgssr] Improve extraction for geo restricted videos (#11089) +* [rts] Improve extraction for geo restricted videos (#4989) + + +version 2016.12.09 + +Core +* [socks] Fix error reporting (#11355) + +Extractors +* [openload] Fix extraction (#10408) +* [pandoratv] Fix extraction (#11023) ++ [telebruxelles] Add support for emission URLs +* [telebruxelles] Extract all formats ++ [bloomberg] Add another video id regular expression (#11371) +* [fusion] Update ooyala id regular expression (#11364) ++ [1tv] Add support for playlists (#11335) +* [1tv] Improve extraction (#11335) ++ [aenetworks] Extract more formats (#11321) ++ [thisoldhouse] Recognize /tv-episode/ URLs (#11271) + + +version 2016.12.01 + +Extractors +* [soundcloud] Update client id (#11327) +* [ruutu] Detect DRM protected videos ++ [liveleak] Add support for youtube embeds (#10688) +* [spike] Fix full episodes support (#11312) +* [comedycentral] Fix full episodes support +* [normalboots] Rewrite in terms of JWPlatform (#11184) +* [teamfourstar] Rewrite in terms of JWPlatform (#11184) +- [screenwavemedia] Remove extractor (#11184) + + +version 2016.11.27 + +Extractors ++ [webcaster] Add support for webcaster.pro ++ [azubu] Add support for azubu.uol.com.br (#11305) +* [viki] Prefer hls formats +* [viki] Fix rtmp formats extraction (#11255) +* [puls4] Relax URL regular expression (#11267) +* [vevo] Improve artist extraction (#10911) +* [mitele] Relax URL regular expression and extract more metadata (#11244) ++ [cbslocal] Recognize New York site (#11285) ++ [youtube:playlist] Pass disable_polymer in URL query (#11193) + + +version 2016.11.22 + +Extractors +* [hellporno] Fix video extension extraction (#11247) ++ [hellporno] Add support for hellporno.net (#11247) ++ [amcnetworks] Recognize more BBC America URLs (#11263) +* [funnyordie] Improve extraction (#11208) +* [extractor/generic] Improve limelight embeds support +- [crunchyroll] Remove ScaledBorderAndShadow from ASS subtitles (#8207, #9028) +* [bandcamp] Fix free downloads extraction and extract all formats (#11067) +* [twitter:card] Relax URL regular expression (#11225) ++ [tvanouvelles] Add support for tvanouvelles.ca (#10616) + + +version 2016.11.18 + +Extractors +* [youtube:live] Relax URL regular expression (#11164) +* [openload] Fix extraction (#10408, #11122) +* [vlive] Prefer locale over language for subtitles id (#11203) + + +version 2016.11.14.1 + +Core ++ [downoader/fragment,f4m,hls] Respect HTTP headers from info dict +* [extractor/common] Fix media templates with Bandwidth substitution pattern in + MPD manifests (#11175) +* [extractor/common] Improve thumbnail extraction from JSON-LD + +Extractors ++ [nrk] Workaround geo restriction ++ [nrk] Improve error detection and messages ++ [afreecatv] Add support for vod.afreecatv.com (#11174) +* [cda] Fix and improve extraction (#10929, #10936) +* [plays] Fix extraction (#11165) +* [eagleplatform] Fix extraction (#11160) ++ [audioboom] Recognize /posts/ URLs (#11149) + + +version 2016.11.08.1 + +Extractors +* [espn:article] Fix support for espn.com articles +* [franceculture] Fix extraction (#11140) + + +version 2016.11.08 + +Extractors +* [tmz:article] Fix extraction (#11052) +* [espn] Fix extraction (#11041) +* [mitele] Fix extraction after website redesign (#10824) +- [ard] Remove age restriction check (#11129) +* [generic] Improve support for pornhub.com embeds (#11100) ++ [generic] Add support for redtube.com embeds (#11099) ++ [generic] Add support for drtuber.com embeds (#11098) ++ [redtube] Add support for embed URLs ++ [drtuber] Add support for embed URLs ++ [yahoo] Improve content id extraction (#11088) +* [toutv] Relax URL regular expression (#11121) + + +version 2016.11.04 + +Core +* [extractor/common] Tolerate malformed RESOLUTION attribute in m3u8 + manifests (#11113) +* [downloader/ism] Fix AVC Decoder Configuration Record + +Extractors ++ [fox9] Add support for fox9.com (#11110) ++ [anvato] Extract more metadata and improve formats extraction +* [vodlocker] Improve removed videos detection (#11106) ++ [vzaar] Add support for vzaar.com (#11093) ++ [vice] Add support for uplynk preplay videos (#11101) +* [tubitv] Fix extraction (#11061) ++ [shahid] Add support for authentication (#11091) ++ [radiocanada] Add subtitles support (#11096) ++ [generic] Add support for ISM manifests + + +version 2016.11.02 + +Core ++ Add basic support for Smooth Streaming protocol (#8118, #10969) +* Improve MPD manifest base URL extraction (#10909, #11079) +* Fix --match-filter for int-like strings (#11082) + +Extractors ++ [mva] Add support for ISM formats ++ [msn] Add support for ISM formats ++ [onet] Add support for ISM formats ++ [tvp] Add support for ISM formats ++ [nicknight] Add support for nicknight sites (#10769) + + +version 2016.10.30 + +Extractors +* [facebook] Improve 1080P video detection (#11073) +* [imgur] Recognize /r/ URLs (#11071) +* [beeg] Fix extraction (#11069) +* [openload] Fix extraction (#10408) +* [gvsearch] Modernize and fix search request (#11051) +* [adultswim] Fix extraction (#10979) ++ [nobelprize] Add support for nobelprize.org (#9999) +* [hornbunny] Fix extraction (#10981) +* [tvp] Improve video id extraction (#10585) + + +version 2016.10.26 + +Extractors ++ [rentv] Add support for ren.tv (#10620) ++ [ard] Detect unavailable videos (#11018) +* [vk] Fix extraction (#11022) + + +version 2016.10.25 + +Core +* Running youtube-dl in the background is fixed (#10996, #10706, #955) + +Extractors ++ [jamendo] Add support for jamendo.com (#10132, #10736) ++ [pandatv] Add support for panda.tv (#10736) ++ [dotsub] Support Vimeo embed (#10964) +* [litv] Fix extraction ++ [vimeo] Delegate ondemand redirects to ondemand extractor (#10994) +* [vivo] Fix extraction (#11003) ++ [twitch:stream] Add support for rebroadcasts (#10995) +* [pluralsight] Fix subtitles conversion (#10990) + + +version 2016.10.21.1 + +Extractors ++ [pluralsight] Process all clip URLs (#10984) + + +version 2016.10.21 + +Core +- Disable thumbnails embedding in mkv ++ Add support for Comcast multiple-system operator (#10819) + +Extractors +* [pluralsight] Adapt to new API (#10972) +* [openload] Fix extraction (#10408, #10971) ++ [natgeo] Extract m3u8 formats (#10959) + + +version 2016.10.19 + +Core ++ [utils] Expose PACKED_CODES_RE ++ [extractor/common] Extract non smil wowza mpd manifests ++ [extractor/common] Detect f4m audio-only formats + +Extractors +* [vidzi] Fix extraction (#10908, #10952) +* [urplay] Fix subtitles extraction ++ [urplay] Add support for urskola.se (#10915) ++ [orf] Add subtitles support (#10939) +* [youtube] Fix --no-playlist behavior for youtu.be/id URLs (#10896) +* [nrk] Relax URL regular expression (#10928) ++ [nytimes] Add support for podcasts (#10926) +* [pluralsight] Relax URL regular expression (#10941) + + +version 2016.10.16 + +Core +* [postprocessor/ffmpeg] Return correct filepath and ext in updated information + in FFmpegExtractAudioPP (#10879) + +Extractors ++ [ruutu] Add support for supla.fi (#10849) ++ [theoperaplatform] Add support for theoperaplatform.eu (#10914) +* [lynda] Fix height for prioritized streams ++ [lynda] Add fallback extraction scenario +* [lynda] Switch to https (#10916) ++ [huajiao] New extractor (#10917) +* [cmt] Fix mgid extraction (#10813) ++ [safari:course] Add support for techbus.safaribooksonline.com +* [orf:tvthek] Fix extraction and modernize (#10898) +* [chirbit] Fix extraction of user profile pages +* [carambatv] Fix extraction +* [canalplus] Fix extraction for some videos +* [cbsinteractive] Fix extraction for cnet.com +* [parliamentliveuk] Lower case URLs are now recognized (#10912) + + +version 2016.10.12 + +Core ++ Support HTML media elements without child nodes +* [Makefile] Support for GNU make < 4 is fixed; BSD make dropped (#9387) + +Extractors +* [dailymotion] Fix extraction (#10901) +* [vimeo:review] Fix extraction (#10900) +* [nhl] Correctly handle invalid formats (#10713) +* [footyroom] Fix extraction (#10810) +* [abc.net.au:iview] Fix for standalone (non series) videos (#10895) ++ [hbo] Add support for episode pages (#10892) +* [allocine] Fix extraction (#10860) ++ [nextmedia] Recognize action news on AppleDaily +* [lego] Improve info extraction and bypass geo restriction (#10872) + + +version 2016.10.07 + +Extractors ++ [iprima] Detect geo restriction +* [facebook] Fix video extraction (#10846) ++ [commonprotocols] Support direct MMS links (#10838) ++ [generic] Add support for multiple vimeo embeds (#10862) ++ [nzz] Add support for nzz.ch (#4407) ++ [npo] Detect geo restriction ++ [npo] Add support for 2doc.nl (#10842) ++ [lego] Add support for lego.com (#10369) ++ [tonline] Add support for t-online.de (#10376) +* [techtalks] Relax URL regular expression (#10840) +* [youtube:live] Extend URL regular expression (#10839) ++ [theweatherchannel] Add support for weather.com (#7188) ++ [thisoldhouse] Add support for thisoldhouse.com (#10837) ++ [nhl] Add support for wch2016.com (#10833) +* [pornoxo] Use JWPlatform to improve metadata extraction + + +version 2016.10.02 + +Core +* Fix possibly lost extended attributes during post-processing ++ Support pyxattr as well as python-xattr for --xattrs and + --xattr-set-filesize (#9054) + +Extractors ++ [jwplatform] Support DASH streams in JWPlayer ++ [jwplatform] Support old-style JWPlayer playlists ++ [byutv:event] Add extractor +* [periscope:user] Fix extraction (#10820) +* [dctp] Fix extraction (#10734) ++ [instagram] Extract video dimensions (#10790) ++ [tvland] Extend URL regular expression (#10812) ++ [vgtv] Add support for tv.aftonbladet.se (#10800) +- [aftonbladet] Remove extractor +* [vk] Fix timestamp and view count extraction (#10760) ++ [vk] Add support for running and finished live streams (#10799) ++ [leeco] Recognize more Le Sports URLs (#10794) ++ [instagram] Extract comments (#10788) ++ [ketnet] Extract mzsource formats (#10770) +* [limelight:media] Improve HTTP formats extraction + + +version 2016.09.27 + +Core ++ Add hdcore query parameter to akamai f4m formats ++ Delegate HLS live streams downloading to ffmpeg ++ Improved support for HTML5 subtitles + +Extractors ++ [vk] Add support for dailymotion embeds (#10661) +* [promptfile] Fix extraction (#10634) +* [kaltura] Speed up embed regular expressions (#10764) ++ [npo] Add support for anderetijden.nl (#10754) ++ [prosiebensat1] Add support for advopedia sites +* [mwave] Relax URL regular expression (#10735, #10748) +* [prosiebensat1] Fix playlist support (#10745) ++ [prosiebensat1] Add support for sat1gold sites (#10745) ++ [cbsnews:livevideo] Fix extraction and extract m3u8 formats ++ [brightcove:new] Add support for live streams +* [soundcloud] Generalize playlist entries extraction (#10733) ++ [mtv] Add support for new URL schema (#8169, #9808) +* [einthusan] Fix extraction (#10714) ++ [twitter] Support Periscope embeds (#10737) ++ [openload] Support subtitles (#10625) + + +version 2016.09.24 + +Core ++ Add support for watchTVeverywhere.com authentication provider based MSOs for + Adobe Pass authentication (#10709) + +Extractors ++ [soundcloud:playlist] Provide video id for early playlist entries (#10733) ++ [prosiebensat1] Add support for kabeleinsdoku (#10732) +* [cbs] Extract info from thunder videoPlayerService (#10728) +* [openload] Fix extraction (#10408) ++ [ustream] Support the new HLS streams (#10698) ++ [ooyala] Extract all HLS formats ++ [cartoonnetwork] Add support for Adobe Pass authentication ++ [soundcloud] Extract license metadata ++ [fox] Add support for Adobe Pass authentication (#8584) ++ [tbs] Add support for Adobe Pass authentication (#10642, #10222) ++ [trutv] Add support for Adobe Pass authentication (#10519) ++ [turner] Add support for Adobe Pass authentication + + +version 2016.09.19 + +Extractors ++ [crunchyroll] Check if already authenticated (#10700) +- [twitch:stream] Remove fallback to profile extraction when stream is offline +* [thisav] Improve title extraction (#10682) +* [vyborymos] Improve station info extraction + + +version 2016.09.18 + +Core ++ Introduce manifest_url and fragments fields in formats dictionary for + fragmented media ++ Provide manifest_url field for DASH segments, HLS and HDS ++ Provide fragments field for DASH segments +* Rework DASH segments downloader to use fragments field ++ Add helper method for Wowza Streaming Engine formats extraction + +Extractors ++ [vyborymos] Add extractor for vybory.mos.ru (#10692) ++ [xfileshare] Add title regular expression for streamin.to (#10646) ++ [globo:article] Add support for multiple videos (#10653) ++ [thisav] Recognize HTML5 videos (#10447) +* [jwplatform] Improve JWPlayer detection ++ [mangomolo] Add support for Mangomolo embeds ++ [toutv] Add support for authentication (#10669) +* [franceinter] Fix upload date extraction +* [tv4] Fix HLS and HDS formats extraction (#10659) + + +version 2016.09.15 + +Core +* Improve _hidden_inputs ++ Introduce improved explicit Adobe Pass support ++ Add --ap-mso to provide multiple-system operator identifier ++ Add --ap-username to provide MSO account username ++ Add --ap-password to provide MSO account password ++ Add --ap-list-mso to list all supported MSOs ++ Add support for Rogers Cable multiple-system operator (#10606) + +Extractors +* [crunchyroll] Fix authentication (#10655) +* [twitch] Fix API calls (#10654, #10660) ++ [bellmedia] Add support for more Bell Media Television sites +* [franceinter] Fix extraction (#10538, #2105) +* [kuwo] Improve error detection (#10650) ++ [go] Add support for free full episodes (#10439) +* [bilibili] Fix extraction for specific videos (#10647) +* [nhk] Fix extraction (#10633) +* [kaltura] Improve audio detection +* [kaltura] Skip chun format ++ [vimeo:ondemand] Pass Referer along with embed URL (#10624) ++ [nbc] Add support for NBC Olympics (#10361) + + +version 2016.09.11.1 + +Extractors ++ [tube8] Extract categories and tags (#10579) ++ [pornhub] Extract categories and tags (#10499) +* [openload] Temporary fix (#10408) ++ [foxnews] Add support Fox News articles (#10598) +* [viafree] Improve video id extraction (#10615) +* [iwara] Fix extraction after relaunch (#10462, #3215) ++ [tfo] Add extractor for tfo.org +* [lrt] Fix audio extraction (#10566) +* [9now] Fix extraction (#10561) ++ [canalplus] Add support for c8.fr (#10577) +* [newgrounds] Fix uploader extraction (#10584) ++ [polskieradio:category] Add support for category lists (#10576) ++ [ketnet] Add extractor for ketnet.be (#10343) ++ [canvas] Add support for een.be (#10605) ++ [telequebec] Add extractor for telequebec.tv (#1999) +* [parliamentliveuk] Fix extraction (#9137) + + +version 2016.09.08 + +Extractors ++ [jwplatform] Extract height from format label ++ [yahoo] Extract Brightcove Legacy Studio embeds (#9345) +* [videomore] Fix extraction (#10592) +* [foxgay] Fix extraction (#10480) ++ [rmcdecouverte] Add extractor for rmcdecouverte.bfmtv.com (#9709) +* [gamestar] Fix metadata extraction (#10479) +* [puls4] Fix extraction (#10583) ++ [cctv] Add extractor for CCTV and CNTV (#8153) ++ [lci] Add extractor for lci.fr (#10573) ++ [wat] Extract DASH formats ++ [viafree] Improve video id detection (#10569) ++ [trutv] Add extractor for trutv.com (#10519) ++ [nick] Add support for nickelodeon.nl (#10559) ++ [abcotvs:clips] Add support for clips.abcotvs.com ++ [abcotvs] Add support for ABC Owned Television Stations sites (#9551) ++ [miaopai] Add extractor for miaopai.com (#10556) +* [gamestar] Fix metadata extraction (#10479) ++ [bilibili] Add support for episodes (#10190) ++ [tvnoe] Add extractor for tvnoe.cz (#10524) + + +version 2016.09.04.1 + +Core +* In DASH downloader if the first segment fails, abort the whole download + process to prevent throttling (#10497) ++ Add support for --skip-unavailable-fragments and --fragment retries in + hlsnative downloader (#10165, #10448). ++ Add support for --skip-unavailable-fragments in DASH downloader ++ Introduce --skip-unavailable-fragments option for fragment based downloaders + that allows to skip fragments unavailable due to a HTTP error +* Fix extraction of video/audio entries with src attribute in + _parse_html5_media_entries (#10540) + +Extractors +* [theplatform] Relax URL regular expression (#10546) +* [youtube:playlist] Extend URL regular expression +* [rottentomatoes] Delegate extraction to internetvideoarchive extractor +* [internetvideoarchive] Extract all formats +* [pornvoisines] Fix extraction (#10469) +* [rottentomatoes] Fix extraction (#10467) +* [espn] Extend URL regular expression (#10549) +* [vimple] Extend URL regular expression (#10547) +* [youtube:watchlater] Fix extraction (#10544) +* [youjizz] Fix extraction (#10437) ++ [foxnews] Add support for FoxNews Insider (#10445) ++ [fc2] Recognize Flash player URLs (#10512) + + +version 2016.09.03 + +Core +* Restore usage of NAME attribute from EXT-X-MEDIA tag for formats codes in + _extract_m3u8_formats (#10522) +* Handle semicolon in mimetype2ext + +Extractors ++ [youtube] Add support for rental videos' previews (#10532) +* [youtube:playlist] Fallback to video extraction for video/playlist URLs when + no playlist is actually served (#10537) ++ [drtv] Add support for dr.dk/nyheder (#10536) ++ [facebook:plugins:video] Add extractor (#10530) ++ [go] Add extractor for *.go.com sites +* [adobepass] Check for authz_token expiration (#10527) +* [nytimes] improve extraction +* [thestar] Fix extraction (#10465) +* [glide] Fix extraction (#10478) +- [exfm] Remove extractor (#10482) +* [youporn] Fix categories and tags extraction (#10521) ++ [curiositystream] Add extractor for app.curiositystream.com +- [thvideo] Remove extractor (#10464) +* [movingimage] Fix for the new site name (#10466) ++ [cbs] Add support for once formats (#10515) +* [limelight] Skip ism snd duplicate manifests ++ [porncom] Extract categories and tags (#10510) ++ [facebook] Extract timestamp (#10508) ++ [yahoo] Extract more formats + + +version 2016.08.31 + +Extractors +* [soundcloud] Fix URL regular expression to avoid clashes with sets (#10505) +* [bandcamp:album] Fix title extraction (#10455) +* [pyvideo] Fix extraction (#10468) ++ [ctv] Add support for tsn.ca, bnn.ca and thecomedynetwork.ca (#10016) +* [9c9media] Extract more metadata +* [9c9media] Fix multiple stacks extraction (#10016) +* [adultswim] Improve video info extraction (#10492) +* [vodplatform] Improve embed regular expression +- [played] Remove extractor (#10470) ++ [tbs] Add extractor for tbs.com and tntdrama.com (#10222) ++ [cartoonnetwork] Add extractor for cartoonnetwork.com (#10110) +* [adultswim] Rework in terms of turner extractor +* [cnn] Rework in terms of turner extractor +* [nba] Rework in terms of turner extractor ++ [turner] Add base extractor for Turner Broadcasting System based sites +* [bilibili] Fix extraction (#10375) +* [openload] Fix extraction (#10408) + + +version 2016.08.28 + +Core ++ Add warning message that ffmpeg doesn't support SOCKS +* Improve thumbnail sorting ++ Extract formats from #EXT-X-MEDIA tags in _extract_m3u8_formats +* Fill IV with leading zeros for IVs shorter than 16 octets in hlsnative ++ Add ac-3 to the list of audio codecs in parse_codecs + +Extractors +* [periscope:user] Fix extraction (#10453) +* [douyutv] Fix extraction (#10153, #10318, #10444) ++ [nhk:vod] Add extractor for www3.nhk.or.jp on demand (#4437, #10424) +- [trutube] Remove extractor (#10438) ++ [usanetwork] Add extractor for usanetwork.com +* [crackle] Fix extraction (#10333) +* [spankbang] Fix description and uploader extraction (#10339) +* [discoverygo] Detect cable provider restricted videos (#10425) ++ [cbc] Add support for watch.cbc.ca +* [kickstarter] Silent the warning for og:description (#10415) +* [mtvservices:embedded] Fix extraction for the new 'edge' player (#10363) + + +version 2016.08.24.1 + +Extractors ++ [pluralsight] Add support for subtitles (#9681) + + +version 2016.08.24 + +Extractors +* [youtube] Fix authentication (#10392) +* [openload] Fix extraction (#10408) ++ [bravotv] Add support for Adobe Pass (#10407) +* [bravotv] Fix clip info extraction (#10407) +* [eagleplatform] Improve embedded videos detection (#10409) +* [awaan] Fix extraction +* [mtvservices:embedded] Update config URL ++ [abc:iview] Add extractor (#6148) + + +version 2016.08.22 + +Core +* Improve formats and subtitles extension auto calculation ++ Recognize full unit names in parse_filesize ++ Add support for m3u8 manifests in HTML5 multimedia tags +* Fix octal/hexadecimal number detection in js_to_json + +Extractors ++ [ivi] Add support for 720p and 1080p ++ [charlierose] Add new extractor (#10382) +* [1tv] Fix extraction (#9249) +* [twitch] Renew authentication +* [kaltura] Improve subtitles extension calculation ++ [zingmp3] Add support for video clips +* [zingmp3] Fix extraction (#10041) +* [kaltura] Improve subtitles extraction (#10279) +* [cultureunplugged] Fix extraction (#10330) ++ [cnn] Add support for money.cnn.com (#2797) +* [cbsnews] Fix extraction (#10362) +* [cbs] Fix extraction (#10393) ++ [litv] Support 'promo' URLs (#10385) +* [snotr] Fix extraction (#10338) +* [n-tv.de] Fix extraction (#10331) +* [globo:article] Relax URL and video id regular expressions (#10379) + + +version 2016.08.19 + +Core +- Remove output template description from --help +* Recognize lowercase units in parse_filesize + +Extractors ++ [porncom] Add extractor for porn.com (#2251, #10251) ++ [generic] Add support for DBTV embeds +* [vk:wallpost] Fix audio extraction for new site layout +* [vk] Fix authentication ++ [hgtvcom:show] Add extractor for hgtv.com shows (#10365) ++ [discoverygo] Add support for another GO network sites + + +version 2016.08.17 + +Core ++ Add _get_netrc_login_info + +Extractors +* [mofosex] Extract all formats (#10335) ++ [generic] Add support for vbox7 embeds ++ [vbox7] Add support for embed URLs ++ [viafree] Add extractor (#10358) ++ [mtg] Add support for viafree URLs (#10358) +* [theplatform] Extract all subtitles per language ++ [xvideos] Fix HLS extraction (#10356) ++ [amcnetworks] Add extractor ++ [bbc:playlist] Add support for pagination (#10349) ++ [fxnetworks] Add extractor (#9462) +* [cbslocal] Fix extraction for SendtoNews-based videos +* [sendtonews] Fix extraction +* [jwplatform] Extract video id from JWPlayer data +- [zippcast] Remove extractor (#10332) ++ [viceland] Add extractor (#8799) ++ [adobepass] Add base extractor for Adobe Pass Authentication +* [life:embed] Improve extraction +* [vgtv] Detect geo restricted videos (#10348) ++ [uplynk] Add extractor +* [xiami] Fix extraction (#10342) + + +version 2016.08.13 + +Core +* Show progress for curl external downloader +* Forward more options to curl external downloader + +Extractors +* [pbs] Fix description extraction +* [franceculture] Fix extraction (#10324) +* [pornotube] Fix extraction (#10322) +* [4tube] Fix metadata extraction (#10321) +* [imgur] Fix width and height extraction (#10325) +* [expotv] Improve extraction ++ [vbox7] Fix extraction (#10309) +- [tapely] Remove extractor (#10323) +* [muenchentv] Fix extraction (#10313) ++ [24video] Add support for .me and .xxx TLDs +* [24video] Fix comment count extraction +* [sunporno] Add support for embed URLs +* [sunporno] Fix metadata extraction (#10316) ++ [hgtv] Add extractor for hgtv.ca (#3999) +- [pbs] Remove request to unavailable API ++ [pbs] Add support for high quality HTTP formats ++ [crunchyroll] Add support for HLS formats (#10301) + + +version 2016.08.12 + +Core +* Subtitles are now written as is. Newline conversions are disabled. (#10268) ++ Recognize more formats in unified_timestamp + +Extractors +- [goldenmoustache] Remove extractor (#10298) +* [drtuber] Improve title extraction +* [drtuber] Make dislike count optional (#10297) +* [chirbit] Fix extraction (#10296) +* [francetvinfo] Relax URL regular expression +* [rtlnl] Relax URL regular expression (#10282) +* [formula1] Relax URL regular expression (#10283) +* [wat] Improve extraction (#10281) +* [ctsnews] Fix extraction + + +version 2016.08.10 + +Core +* Make --metadata-from-title non fatal when title does not match the pattern +* Introduce options for randomized sleep before each download + --min-sleep-interval and --max-sleep-interval (#9930) +* Respect default in _search_json_ld + +Extractors ++ [uol] Add extractor for uol.com.br (#4263) +* [rbmaradio] Fix extraction and extract all formats (#10242) ++ [sonyliv] Add extractor for sonyliv.com (#10258) +* [aparat] Fix extraction +* [cwtv] Extract HTTP formats ++ [rozhlas] Add extractor for prehravac.rozhlas.cz (#10253) +* [kuwo:singer] Fix extraction + + +version 2016.08.07 + +Core ++ Add support for TV Parental Guidelines ratings in parse_age_limit ++ Add decode_png (#9706) ++ Add support for partOfTVSeries in JSON-LD +* Lower master M3U8 manifest preference for better format sorting + +Extractors ++ [discoverygo] Add extractor (#10245) +* [flipagram] Make JSON-LD extraction non fatal +* [generic] Make JSON-LD extraction non fatal ++ [bbc] Add support for morph embeds (#10239) +* [tnaflixnetworkbase] Improve title extraction +* [tnaflix] Fix metadata extraction (#10249) +* [fox] Fix theplatform release URL query +* [openload] Fix extraction (#9706) +* [bbc] Skip duplicate manifest URLs +* [bbc] Improve format code ++ [bbc] Add support for DASH and F4M +* [bbc] Improve format sorting and listing +* [bbc] Improve playlist extraction ++ [pokemon] Add extractor (#10093) ++ [condenast] Add fallback scenario for video info extraction + + +version 2016.08.06 + +Core +* Add support for JSON-LD root list entries (#10203) +* Improve unified_timestamp +* Lower preference of RTSP formats in generic sorting ++ Add support for multiple properties in _og_search_property +* Improve password hiding from verbose output + +Extractors ++ [adultswim] Add support for trailers (#10235) +* [archiveorg] Improve extraction (#10219) ++ [jwplatform] Add support for playlists ++ [jwplatform] Add support for relative URLs +* [jwplatform] Improve audio detection ++ [tvplay] Capture and output native error message ++ [tvplay] Extract series metadata ++ [tvplay] Add support for subtitles (#10194) +* [tvp] Improve extraction (#7799) +* [cbslocal] Fix timestamp parsing (#10213) ++ [naver] Add support for subtitles (#8096) +* [naver] Improve extraction +* [condenast] Improve extraction +* [engadget] Relax URL regular expression +* [5min] Fix extraction ++ [nationalgeographic] Add support for Episode Guide ++ [kaltura] Add support for subtitles +* [kaltura] Optimize network requests ++ [vodplatform] Add extractor for vod-platform.net +- [gamekings] Remove extractor +* [limelight] Extract HTTP formats +* [ntvru] Fix extraction ++ [comedycentral] Re-add :tds and :thedailyshow shortnames + + +version 2016.08.01 + +Fixed/improved extractors +- [yandexmusic:track] Adapt to changes in track location JSON (#10193) +- [bloomberg] Support another form of player (#10187) +- [limelight] Skip DRM protected videos +- [safari] Relax regular expressions for URL matching (#10202) +- [cwtv] Add support for cwtvpr.com (#10196) + + +version 2016.07.30 + +Fixed/improved extractors +- [twitch:clips] Sort formats +- [tv2] Use m3u8_native +- [tv2:article] Fix video detection (#10188) +- rtve (#10076) +- [dailymotion:playlist] Optimize download archive processing (#10180) + + +version 2016.07.28 + +Fixed/improved extractors +- shared (#10170) +- soundcloud (#10179) +- twitch (#9767) + + +version 2016.07.26.2 + +Fixed/improved extractors +- smotri +- camdemy +- mtv +- comedycentral +- cmt +- cbc +- mgtv +- orf + + +version 2016.07.24 + +New extractors +- arkena (#8682) +- lcp (#8682) + +Fixed/improved extractors +- facebook (#10151) +- dailymail +- telegraaf +- dcn +- onet +- tvp + +Miscellaneous +- Support $Time$ in DASH manifests + + +version 2016.07.22 + +New extractors +- odatv (#9285) + +Fixed/improved extractors +- bbc +- youjizz (#10131) +- youtube (#10140) +- pornhub (#10138) +- eporner (#10139) + + +version 2016.07.17 + +New extractors +- nintendo (#9986) +- streamable (#9122) + +Fixed/improved extractors +- ard (#10095) +- mtv +- comedycentral (#10101) +- viki (#10098) +- spike (#10106) + +Miscellaneous +- Improved twitter player detection (#10090) + + +version 2016.07.16 + +New extractors +- ninenow (#5181) + +Fixed/improved extractors +- rtve (#10076) +- brightcove +- 3qsdn +- syfy (#9087, #3820, #2388) +- youtube (#10083) + +Miscellaneous +- Fix subtitle embedding for video-only and audio-only files (#10081) + + +version 2016.07.13 + +New extractors +- rudo + +Fixed/improved extractors +- biobiochiletv +- tvplay +- dbtv +- brightcove +- tmz +- youtube (#10059) +- shahid (#10062) +- vk +- ellentv (#10067) + + +version 2016.07.11 + +New Extractors +- roosterteeth (#9864) + +Fixed/improved extractors +- miomio (#9605) +- vuclip +- youtube +- vidzi (#10058) + + +version 2016.07.09.2 + +Fixed/improved extractors +- vimeo (#1638) +- facebook (#10048) +- lynda (#10047) +- animeondemand + +Fixed/improved features +- Embedding subtitles no longer throws an error with problematic inputs (#9063) + + +version 2016.07.09.1 + +Fixed/improved extractors +- youtube +- ard +- srmediatek (#9373) + + +version 2016.07.09 + +New extractors +- Flipagram (#9898) + +Fixed/improved extractors +- telecinco +- toutv +- radiocanada +- tweakers (#9516) +- lynda +- nick (#7542) +- polskieradio (#10028) +- le +- facebook (#9851) +- mgtv +- animeondemand (#10031) + +Fixed/improved features +- `--postprocessor-args` and `--downloader-args` now accepts non-ASCII inputs + on non-Windows systems + + +version 2016.07.07 + +New extractors +- kamcord (#10001) + +Fixed/improved extractors +- spiegel (#10018) +- metacafe (#8539, #3253) +- onet (#9950) +- francetv (#9955) +- brightcove (#9965) +- daum (#9972) + + +version 2016.07.06 + +Fixed/improved extractors +- youtube (#10007, #10009) +- xuite +- stitcher +- spiegel +- slideshare +- sandia +- rtvnh +- prosiebensat1 +- onionstudios + + +version 2016.07.05 + +Fixed/improved extractors +- brightcove +- yahoo (#9995) +- pornhub (#9997) +- iqiyi +- kaltura (#5557) +- la7 +- Changed features +- Rename --cn-verfication-proxy to --geo-verification-proxy +Miscellaneous +- Add script for displaying downloads statistics + + +version 2016.07.03.1 + +Fixed/improved extractors +- theplatform +- aenetworks +- nationalgeographic +- hrti (#9482) +- facebook (#5701) +- buzzfeed (#5701) +- rai (#8617, #9157, #9232, #8552, #8551) +- nationalgeographic (#9991) +- iqiyi + + +version 2016.07.03 + +New extractors +- hrti (#9482) + +Fixed/improved extractors +- vk (#9981) +- facebook (#9938) +- xtube (#9953, #9961) + + +version 2016.07.02 + +New extractors +- fusion (#9958) + +Fixed/improved extractors +- twitch (#9975) +- vine (#9970) +- periscope (#9967) +- pornhub (#8696) + + +version 2016.07.01 + +New extractors +- 9c9media +- ctvnews (#2156) +- ctv (#4077) + +Fixed/Improved extractors +- rds +- meta (#8789) +- pornhub (#9964) +- sixplay (#2183) + +New features +- Accept quoted strings across multiple lines (#9940) diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..68a49daad --- /dev/null +++ b/LICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to <http://unlicense.org/> diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 000000000..4e43e99f3 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,9 @@ +include README.md +include LICENSE +include AUTHORS +include ChangeLog +include youtube-dl.bash-completion +include youtube-dl.fish +include youtube-dl.1 +recursive-include docs Makefile conf.py *.rst +recursive-include test * diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..3e17365b8 --- /dev/null +++ b/Makefile @@ -0,0 +1,135 @@ +all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites + +clean: + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe + find . -name "*.pyc" -delete + find . -name "*.class" -delete + +PREFIX ?= /usr/local +BINDIR ?= $(PREFIX)/bin +MANDIR ?= $(PREFIX)/man +SHAREDIR ?= $(PREFIX)/share +PYTHON ?= /usr/bin/env python + +# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local +SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) + +# set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2 +MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi) + +install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish + install -d $(DESTDIR)$(BINDIR) + install -m 755 youtube-dl $(DESTDIR)$(BINDIR) + install -d $(DESTDIR)$(MANDIR)/man1 + install -m 644 youtube-dl.1 $(DESTDIR)$(MANDIR)/man1 + install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d + install -m 644 youtube-dl.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/youtube-dl + install -d $(DESTDIR)$(SHAREDIR)/zsh/site-functions + install -m 644 youtube-dl.zsh $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_youtube-dl + install -d $(DESTDIR)$(SYSCONFDIR)/fish/completions + install -m 644 youtube-dl.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/youtube-dl.fish + +codetest: + flake8 . + +test: + #nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test + nosetests --verbose test + $(MAKE) codetest + +ot: offlinetest + +# Keep this list in sync with devscripts/run_tests.sh +offlinetest: codetest + $(PYTHON) -m nose --verbose test \ + --exclude test_age_restriction.py \ + --exclude test_download.py \ + --exclude test_iqiyi_sdk_interpreter.py \ + --exclude test_socks.py \ + --exclude test_subtitles.py \ + --exclude test_write_annotations.py \ + --exclude test_youtube_lists.py \ + --exclude test_youtube_signature.py + +tar: youtube-dl.tar.gz + +.PHONY: all clean install test tar bash-completion pypi-files zsh-completion fish-completion ot offlinetest codetest supportedsites + +pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish + +youtube-dl: youtube_dl/*.py youtube_dl/*/*.py + mkdir -p zip + for d in youtube_dl youtube_dl/downloader youtube_dl/extractor youtube_dl/postprocessor ; do \ + mkdir -p zip/$$d ;\ + cp -pPR $$d/*.py zip/$$d/ ;\ + done + touch -t 200001010101 zip/youtube_dl/*.py zip/youtube_dl/*/*.py + mv zip/youtube_dl/__main__.py zip/ + cd zip ; zip -q ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py + rm -rf zip + echo '#!$(PYTHON)' > youtube-dl + cat youtube-dl.zip >> youtube-dl + rm youtube-dl.zip + chmod a+x youtube-dl + +README.md: youtube_dl/*.py youtube_dl/*/*.py + COLUMNS=80 $(PYTHON) youtube_dl/__main__.py --help | $(PYTHON) devscripts/make_readme.py + +CONTRIBUTING.md: README.md + $(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md + +issuetemplates: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md youtube_dl/version.py + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md .github/ISSUE_TEMPLATE/1_broken_site.md + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md .github/ISSUE_TEMPLATE/2_site_support_request.md + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md .github/ISSUE_TEMPLATE/4_bug_report.md + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md .github/ISSUE_TEMPLATE/5_feature_request.md + +supportedsites: + $(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md + +README.txt: README.md + pandoc -f $(MARKDOWN) -t plain README.md -o README.txt + +youtube-dl.1: README.md + $(PYTHON) devscripts/prepare_manpage.py youtube-dl.1.temp.md + pandoc -s -f $(MARKDOWN) -t man youtube-dl.1.temp.md -o youtube-dl.1 + rm -f youtube-dl.1.temp.md + +youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in + $(PYTHON) devscripts/bash-completion.py + +bash-completion: youtube-dl.bash-completion + +youtube-dl.zsh: youtube_dl/*.py youtube_dl/*/*.py devscripts/zsh-completion.in + $(PYTHON) devscripts/zsh-completion.py + +zsh-completion: youtube-dl.zsh + +youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in + $(PYTHON) devscripts/fish-completion.py + +fish-completion: youtube-dl.fish + +lazy-extractors: youtube_dl/extractor/lazy_extractors.py + +_EXTRACTOR_FILES = $(shell find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py') +youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) + $(PYTHON) devscripts/make_lazy_extractors.py $@ + +youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog AUTHORS + @tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \ + --exclude '*.DS_Store' \ + --exclude '*.kate-swp' \ + --exclude '*.pyc' \ + --exclude '*.pyo' \ + --exclude '*~' \ + --exclude '__pycache__' \ + --exclude '.git' \ + --exclude 'docs/_build' \ + -- \ + bin devscripts test youtube_dl docs \ + ChangeLog AUTHORS LICENSE README.md README.txt \ + Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \ + youtube-dl.zsh youtube-dl.fish setup.py setup.cfg \ + youtube-dl diff --git a/README.md b/README.md new file mode 100644 index 000000000..8c48a3012 --- /dev/null +++ b/README.md @@ -0,0 +1,1378 @@ +[![Build Status](https://travis-ci.org/ytdl-org/youtube-dl.svg?branch=master)](https://travis-ci.org/ytdl-org/youtube-dl) + +youtube-dl - download videos from youtube.com or other video platforms + +- [INSTALLATION](#installation) +- [DESCRIPTION](#description) +- [OPTIONS](#options) +- [CONFIGURATION](#configuration) +- [OUTPUT TEMPLATE](#output-template) +- [FORMAT SELECTION](#format-selection) +- [VIDEO SELECTION](#video-selection) +- [FAQ](#faq) +- [DEVELOPER INSTRUCTIONS](#developer-instructions) +- [EMBEDDING YOUTUBE-DL](#embedding-youtube-dl) +- [BUGS](#bugs) +- [COPYRIGHT](#copyright) + +# INSTALLATION + +To install it right away for all UNIX users (Linux, macOS, etc.), type: + + sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl + sudo chmod a+rx /usr/local/bin/youtube-dl + +If you do not have curl, you can alternatively use a recent wget: + + sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl + sudo chmod a+rx /usr/local/bin/youtube-dl + +Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](https://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`). + +You can also use pip: + + sudo -H pip install --upgrade youtube-dl + +This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information. + +macOS users can install youtube-dl with [Homebrew](https://brew.sh/): + + brew install youtube-dl + +Or with [MacPorts](https://www.macports.org/): + + sudo port install youtube-dl + +Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://ytdl-org.github.io/youtube-dl/download.html). + +# DESCRIPTION +**youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like. + + youtube-dl [OPTIONS] URL [URL...] + +# OPTIONS + -h, --help Print this help text and exit + --version Print program version and exit + -U, --update Update this program to latest version. Make + sure that you have sufficient permissions + (run with sudo if needed) + -i, --ignore-errors Continue on download errors, for example to + skip unavailable videos in a playlist + --abort-on-error Abort downloading of further videos (in the + playlist or the command line) if an error + occurs + --dump-user-agent Display the current browser identification + --list-extractors List all supported extractors + --extractor-descriptions Output descriptions of all supported + extractors + --force-generic-extractor Force extraction to use the generic + extractor + --default-search PREFIX Use this prefix for unqualified URLs. For + example "gvsearch2:" downloads two videos + from google videos for youtube-dl "large + apple". Use the value "auto" to let + youtube-dl guess ("auto_warning" to emit a + warning when guessing). "error" just throws + an error. The default value "fixup_error" + repairs broken URLs, but emits an error if + this is not possible instead of searching. + --ignore-config Do not read configuration files. When given + in the global configuration file + /etc/youtube-dl.conf: Do not read the user + configuration in ~/.config/youtube- + dl/config (%APPDATA%/youtube-dl/config.txt + on Windows) + --config-location PATH Location of the configuration file; either + the path to the config or its containing + directory. + --flat-playlist Do not extract the videos of a playlist, + only list them. + --mark-watched Mark videos watched (YouTube only) + --no-mark-watched Do not mark videos watched (YouTube only) + --no-color Do not emit color codes in output + +## Network Options: + --proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. + To enable SOCKS proxy, specify a proper + scheme. For example + socks5://127.0.0.1:1080/. Pass in an empty + string (--proxy "") for direct connection + --socket-timeout SECONDS Time to wait before giving up, in seconds + --source-address IP Client-side IP address to bind to + -4, --force-ipv4 Make all connections via IPv4 + -6, --force-ipv6 Make all connections via IPv6 + +## Geo Restriction: + --geo-verification-proxy URL Use this proxy to verify the IP address for + some geo-restricted sites. The default + proxy specified by --proxy (or none, if the + option is not present) is used for the + actual downloading. + --geo-bypass Bypass geographic restriction via faking + X-Forwarded-For HTTP header + --no-geo-bypass Do not bypass geographic restriction via + faking X-Forwarded-For HTTP header + --geo-bypass-country CODE Force bypass geographic restriction with + explicitly provided two-letter ISO 3166-2 + country code + --geo-bypass-ip-block IP_BLOCK Force bypass geographic restriction with + explicitly provided IP block in CIDR + notation + +## Video Selection: + --playlist-start NUMBER Playlist video to start at (default is 1) + --playlist-end NUMBER Playlist video to end at (default is last) + --playlist-items ITEM_SPEC Playlist video items to download. Specify + indices of the videos in the playlist + separated by commas like: "--playlist-items + 1,2,5,8" if you want to download videos + indexed 1, 2, 5, 8 in the playlist. You can + specify range: "--playlist-items + 1-3,7,10-13", it will download the videos + at index 1, 2, 3, 7, 10, 11, 12 and 13. + --match-title REGEX Download only matching titles (regex or + caseless sub-string) + --reject-title REGEX Skip download for matching titles (regex or + caseless sub-string) + --max-downloads NUMBER Abort after downloading NUMBER files + --min-filesize SIZE Do not download any videos smaller than + SIZE (e.g. 50k or 44.6m) + --max-filesize SIZE Do not download any videos larger than SIZE + (e.g. 50k or 44.6m) + --date DATE Download only videos uploaded in this date + --datebefore DATE Download only videos uploaded on or before + this date (i.e. inclusive) + --dateafter DATE Download only videos uploaded on or after + this date (i.e. inclusive) + --min-views COUNT Do not download any videos with less than + COUNT views + --max-views COUNT Do not download any videos with more than + COUNT views + --match-filter FILTER Generic video filter. Specify any key (see + the "OUTPUT TEMPLATE" for a list of + available keys) to match if the key is + present, !key to check if the key is not + present, key > NUMBER (like "comment_count + > 12", also works with >=, <, <=, !=, =) to + compare against a number, key = 'LITERAL' + (like "uploader = 'Mike Smith'", also works + with !=) to match against a string literal + and & to require multiple matches. Values + which are not known are excluded unless you + put a question mark (?) after the operator. + For example, to only match videos that have + been liked more than 100 times and disliked + less than 50 times (or the dislike + functionality is not available at the given + service), but who also have a description, + use --match-filter "like_count > 100 & + dislike_count <? 50 & description" . + --no-playlist Download only the video, if the URL refers + to a video and a playlist. + --yes-playlist Download the playlist, if the URL refers to + a video and a playlist. + --age-limit YEARS Download only videos suitable for the given + age + --download-archive FILE Download only videos not listed in the + archive file. Record the IDs of all + downloaded videos in it. + --include-ads Download advertisements as well + (experimental) + +## Download Options: + -r, --limit-rate RATE Maximum download rate in bytes per second + (e.g. 50K or 4.2M) + -R, --retries RETRIES Number of retries (default is 10), or + "infinite". + --fragment-retries RETRIES Number of retries for a fragment (default + is 10), or "infinite" (DASH, hlsnative and + ISM) + --skip-unavailable-fragments Skip unavailable fragments (DASH, hlsnative + and ISM) + --abort-on-unavailable-fragment Abort downloading when some fragment is not + available + --keep-fragments Keep downloaded fragments on disk after + downloading is finished; fragments are + erased by default + --buffer-size SIZE Size of download buffer (e.g. 1024 or 16K) + (default is 1024) + --no-resize-buffer Do not automatically adjust the buffer + size. By default, the buffer size is + automatically resized from an initial value + of SIZE. + --http-chunk-size SIZE Size of a chunk for chunk-based HTTP + downloading (e.g. 10485760 or 10M) (default + is disabled). May be useful for bypassing + bandwidth throttling imposed by a webserver + (experimental) + --playlist-reverse Download playlist videos in reverse order + --playlist-random Download playlist videos in random order + --xattr-set-filesize Set file xattribute ytdl.filesize with + expected file size + --hls-prefer-native Use the native HLS downloader instead of + ffmpeg + --hls-prefer-ffmpeg Use ffmpeg instead of the native HLS + downloader + --hls-use-mpegts Use the mpegts container for HLS videos, + allowing to play the video while + downloading (some players may not be able + to play it) + --external-downloader COMMAND Use the specified external downloader. + Currently supports + aria2c,avconv,axel,curl,ffmpeg,httpie,wget + --external-downloader-args ARGS Give these arguments to the external + downloader + +## Filesystem Options: + -a, --batch-file FILE File containing URLs to download ('-' for + stdin), one URL per line. Lines starting + with '#', ';' or ']' are considered as + comments and ignored. + --id Use only video ID in file name + -o, --output TEMPLATE Output filename template, see the "OUTPUT + TEMPLATE" for all the info + --autonumber-start NUMBER Specify the start value for %(autonumber)s + (default is 1) + --restrict-filenames Restrict filenames to only ASCII + characters, and avoid "&" and spaces in + filenames + -w, --no-overwrites Do not overwrite files + -c, --continue Force resume of partially downloaded files. + By default, youtube-dl will resume + downloads if possible. + --no-continue Do not resume partially downloaded files + (restart from beginning) + --no-part Do not use .part files - write directly + into output file + --no-mtime Do not use the Last-modified header to set + the file modification time + --write-description Write video description to a .description + file + --write-info-json Write video metadata to a .info.json file + --write-annotations Write video annotations to a + .annotations.xml file + --load-info-json FILE JSON file containing the video information + (created with the "--write-info-json" + option) + --cookies FILE File to read cookies from and dump cookie + jar in + --cache-dir DIR Location in the filesystem where youtube-dl + can store some downloaded information + permanently. By default + $XDG_CACHE_HOME/youtube-dl or + ~/.cache/youtube-dl . At the moment, only + YouTube player files (for videos with + obfuscated signatures) are cached, but that + may change. + --no-cache-dir Disable filesystem caching + --rm-cache-dir Delete all filesystem cache files + +## Thumbnail images: + --write-thumbnail Write thumbnail image to disk + --write-all-thumbnails Write all thumbnail image formats to disk + --list-thumbnails Simulate and list all available thumbnail + formats + +## Verbosity / Simulation Options: + -q, --quiet Activate quiet mode + --no-warnings Ignore warnings + -s, --simulate Do not download the video and do not write + anything to disk + --skip-download Do not download the video + -g, --get-url Simulate, quiet but print URL + -e, --get-title Simulate, quiet but print title + --get-id Simulate, quiet but print id + --get-thumbnail Simulate, quiet but print thumbnail URL + --get-description Simulate, quiet but print video description + --get-duration Simulate, quiet but print video length + --get-filename Simulate, quiet but print output filename + --get-format Simulate, quiet but print output format + -j, --dump-json Simulate, quiet but print JSON information. + See the "OUTPUT TEMPLATE" for a description + of available keys. + -J, --dump-single-json Simulate, quiet but print JSON information + for each command-line argument. If the URL + refers to a playlist, dump the whole + playlist information in a single line. + --print-json Be quiet and print the video information as + JSON (video is still being downloaded). + --newline Output progress bar as new lines + --no-progress Do not print progress bar + --console-title Display progress in console titlebar + -v, --verbose Print various debugging information + --dump-pages Print downloaded pages encoded using base64 + to debug problems (very verbose) + --write-pages Write downloaded intermediary pages to + files in the current directory to debug + problems + --print-traffic Display sent and read HTTP traffic + -C, --call-home Contact the youtube-dl server for debugging + --no-call-home Do NOT contact the youtube-dl server for + debugging + +## Workarounds: + --encoding ENCODING Force the specified encoding (experimental) + --no-check-certificate Suppress HTTPS certificate validation + --prefer-insecure Use an unencrypted connection to retrieve + information about the video. (Currently + supported only for YouTube) + --user-agent UA Specify a custom user agent + --referer URL Specify a custom referer, use if the video + access is restricted to one domain + --add-header FIELD:VALUE Specify a custom HTTP header and its value, + separated by a colon ':'. You can use this + option multiple times + --bidi-workaround Work around terminals that lack + bidirectional text support. Requires bidiv + or fribidi executable in PATH + --sleep-interval SECONDS Number of seconds to sleep before each + download when used alone or a lower bound + of a range for randomized sleep before each + download (minimum possible number of + seconds to sleep) when used along with + --max-sleep-interval. + --max-sleep-interval SECONDS Upper bound of a range for randomized sleep + before each download (maximum possible + number of seconds to sleep). Must only be + used along with --min-sleep-interval. + +## Video Format Options: + -f, --format FORMAT Video format code, see the "FORMAT + SELECTION" for all the info + --all-formats Download all available video formats + --prefer-free-formats Prefer free video formats unless a specific + one is requested + -F, --list-formats List all available formats of requested + videos + --youtube-skip-dash-manifest Do not download the DASH manifests and + related data on YouTube videos + --merge-output-format FORMAT If a merge is required (e.g. + bestvideo+bestaudio), output to given + container format. One of mkv, mp4, ogg, + webm, flv. Ignored if no merge is required + +## Subtitle Options: + --write-sub Write subtitle file + --write-auto-sub Write automatically generated subtitle file + (YouTube only) + --all-subs Download all the available subtitles of the + video + --list-subs List all available subtitles for the video + --sub-format FORMAT Subtitle format, accepts formats + preference, for example: "srt" or + "ass/srt/best" + --sub-lang LANGS Languages of the subtitles to download + (optional) separated by commas, use --list- + subs for available language tags + +## Authentication Options: + -u, --username USERNAME Login with this account ID + -p, --password PASSWORD Account password. If this option is left + out, youtube-dl will ask interactively. + -2, --twofactor TWOFACTOR Two-factor authentication code + -n, --netrc Use .netrc authentication data + --video-password PASSWORD Video password (vimeo, smotri, youku) + +## Adobe Pass Options: + --ap-mso MSO Adobe Pass multiple-system operator (TV + provider) identifier, use --ap-list-mso for + a list of available MSOs + --ap-username USERNAME Multiple-system operator account login + --ap-password PASSWORD Multiple-system operator account password. + If this option is left out, youtube-dl will + ask interactively. + --ap-list-mso List all supported multiple-system + operators + +## Post-processing Options: + -x, --extract-audio Convert video files to audio-only files + (requires ffmpeg or avconv and ffprobe or + avprobe) + --audio-format FORMAT Specify audio format: "best", "aac", + "flac", "mp3", "m4a", "opus", "vorbis", or + "wav"; "best" by default; No effect without + -x + --audio-quality QUALITY Specify ffmpeg/avconv audio quality, insert + a value between 0 (better) and 9 (worse) + for VBR or a specific bitrate like 128K + (default 5) + --recode-video FORMAT Encode the video to another format if + necessary (currently supported: + mp4|flv|ogg|webm|mkv|avi) + --postprocessor-args ARGS Give these arguments to the postprocessor + -k, --keep-video Keep the video file on disk after the post- + processing; the video is erased by default + --no-post-overwrites Do not overwrite post-processed files; the + post-processed files are overwritten by + default + --embed-subs Embed subtitles in the video (only for mp4, + webm and mkv videos) + --embed-thumbnail Embed thumbnail in the audio as cover art + --add-metadata Write metadata to the video file + --metadata-from-title FORMAT Parse additional metadata like song title / + artist from the video title. The format + syntax is the same as --output. Regular + expression with named capture groups may + also be used. The parsed parameters replace + existing values. Example: --metadata-from- + title "%(artist)s - %(title)s" matches a + title like "Coldplay - Paradise". Example + (regex): --metadata-from-title + "(?P<artist>.+?) - (?P<title>.+)" + --xattrs Write metadata to the video file's xattrs + (using dublin core and xdg standards) + --fixup POLICY Automatically correct known faults of the + file. One of never (do nothing), warn (only + emit a warning), detect_or_warn (the + default; fix file if we can, warn + otherwise) + --prefer-avconv Prefer avconv over ffmpeg for running the + postprocessors + --prefer-ffmpeg Prefer ffmpeg over avconv for running the + postprocessors (default) + --ffmpeg-location PATH Location of the ffmpeg/avconv binary; + either the path to the binary or its + containing directory. + --exec CMD Execute a command on the file after + downloading, similar to find's -exec + syntax. Example: --exec 'adb push {} + /sdcard/Music/ && rm {}' + --convert-subs FORMAT Convert the subtitles to other format + (currently supported: srt|ass|vtt|lrc) + +# CONFIGURATION + +You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and macOS, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself. + +For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory: +``` +# Lines starting with # are comments + +# Always extract audio +-x + +# Do not copy the mtime +--no-mtime + +# Use this proxy +--proxy 127.0.0.1:3128 + +# Save all videos under Movies directory in your home directory +-o ~/Movies/%(title)s.%(ext)s +``` + +Note that options in configuration file are just the same options aka switches used in regular command line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. + +You can use `--ignore-config` if you want to disable the configuration file for a particular youtube-dl run. + +You can also use `--config-location` if you want to use custom configuration file for a particular youtube-dl run. + +### Authentication with `.netrc` file + +You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you: +``` +touch $HOME/.netrc +chmod a-rwx,u+rw $HOME/.netrc +``` +After that you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase: +``` +machine <extractor> login <login> password <password> +``` +For example: +``` +machine youtube login myaccount@gmail.com password my_youtube_password +machine twitch login my_twitch_account_name password my_twitch_password +``` +To activate authentication with the `.netrc` file you should pass `--netrc` to youtube-dl or place it in the [configuration file](#configuration). + +On Windows you may also need to setup the `%HOME%` environment variable manually. For example: +``` +set HOME=%USERPROFILE% +``` + +# OUTPUT TEMPLATE + +The `-o` option allows users to indicate a template for the output file names. + +**tl;dr:** [navigate me to examples](#output-template-examples). + +The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Allowed names along with sequence type are: + + - `id` (string): Video identifier + - `title` (string): Video title + - `url` (string): Video URL + - `ext` (string): Video filename extension + - `alt_title` (string): A secondary title of the video + - `display_id` (string): An alternative identifier for the video + - `uploader` (string): Full name of the video uploader + - `license` (string): License name the video is licensed under + - `creator` (string): The creator of the video + - `release_date` (string): The date (YYYYMMDD) when the video was released + - `timestamp` (numeric): UNIX timestamp of the moment the video became available + - `upload_date` (string): Video upload date (YYYYMMDD) + - `uploader_id` (string): Nickname or id of the video uploader + - `channel` (string): Full name of the channel the video is uploaded on + - `channel_id` (string): Id of the channel + - `location` (string): Physical location where the video was filmed + - `duration` (numeric): Length of the video in seconds + - `view_count` (numeric): How many users have watched the video on the platform + - `like_count` (numeric): Number of positive ratings of the video + - `dislike_count` (numeric): Number of negative ratings of the video + - `repost_count` (numeric): Number of reposts of the video + - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage + - `comment_count` (numeric): Number of comments on the video + - `age_limit` (numeric): Age restriction for the video (years) + - `is_live` (boolean): Whether this video is a live stream or a fixed-length video + - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL + - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL + - `format` (string): A human-readable description of the format + - `format_id` (string): Format code specified by `--format` + - `format_note` (string): Additional info about the format + - `width` (numeric): Width of the video + - `height` (numeric): Height of the video + - `resolution` (string): Textual description of width and height + - `tbr` (numeric): Average bitrate of audio and video in KBit/s + - `abr` (numeric): Average audio bitrate in KBit/s + - `acodec` (string): Name of the audio codec in use + - `asr` (numeric): Audio sampling rate in Hertz + - `vbr` (numeric): Average video bitrate in KBit/s + - `fps` (numeric): Frame rate + - `vcodec` (string): Name of the video codec in use + - `container` (string): Name of the container format + - `filesize` (numeric): The number of bytes, if known in advance + - `filesize_approx` (numeric): An estimate for the number of bytes + - `protocol` (string): The protocol that will be used for the actual download + - `extractor` (string): Name of the extractor + - `extractor_key` (string): Key name of the extractor + - `epoch` (numeric): Unix epoch when creating the file + - `autonumber` (numeric): Five-digit number that will be increased with each download, starting at zero + - `playlist` (string): Name or id of the playlist that contains the video + - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist + - `playlist_id` (string): Playlist identifier + - `playlist_title` (string): Playlist title + - `playlist_uploader` (string): Full name of the playlist uploader + - `playlist_uploader_id` (string): Nickname or id of the playlist uploader + +Available for the video that belongs to some logical chapter or section: + + - `chapter` (string): Name or title of the chapter the video belongs to + - `chapter_number` (numeric): Number of the chapter the video belongs to + - `chapter_id` (string): Id of the chapter the video belongs to + +Available for the video that is an episode of some series or programme: + + - `series` (string): Title of the series or programme the video episode belongs to + - `season` (string): Title of the season the video episode belongs to + - `season_number` (numeric): Number of the season the video episode belongs to + - `season_id` (string): Id of the season the video episode belongs to + - `episode` (string): Title of the video episode + - `episode_number` (numeric): Number of the video episode within a season + - `episode_id` (string): Id of the video episode + +Available for the media that is a track or a part of a music album: + + - `track` (string): Title of the track + - `track_number` (numeric): Number of the track within an album or a disc + - `track_id` (string): Id of the track + - `artist` (string): Artist(s) of the track + - `genre` (string): Genre(s) of the track + - `album` (string): Title of the album the track belongs to + - `album_type` (string): Type of the album + - `album_artist` (string): List of all artists appeared on the album + - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to + - `release_year` (numeric): Year (YYYY) when the album was released + +Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`. + +For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory. + +For numeric sequences you can use numeric related formatting, for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. + +Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you. + +To use percent literals in an output template use `%%`. To output to stdout use `-o -`. + +The current default template is `%(title)s-%(id)s.%(ext)s`. + +In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title: + +#### Output template and Windows batch files + +If you are using an output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`. + +#### Output template examples + +Note that on Windows you may need to use double quotes instead of single. + +```bash +$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc +youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters + +$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc --restrict-filenames +youtube-dl_test_video_.mp4 # A simple file name + +# Download YouTube playlist videos in separate directory indexed by video order in a playlist +$ youtube-dl -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re + +# Download all playlists of YouTube channel/user keeping each playlist in separate directory: +$ youtube-dl -o '%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/user/TheLinuxFoundation/playlists + +# Download Udemy course keeping each chapter in separate directory under MyVideos directory in your home +$ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/ + +# Download entire series season keeping each series and each season in separate directory under C:/MyVideos +$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" https://videomore.ru/kino_v_detalayah/5_sezon/367617 + +# Stream the video being downloaded to stdout +$ youtube-dl -o - BaW_jenozKc +``` + +# FORMAT SELECTION + +By default youtube-dl tries to download the best available quality, i.e. if you want the best quality you **don't need** to pass any special options, youtube-dl will guess it for you by **default**. + +But sometimes you may want to download in a different format, for example when you are on a slow or intermittent connection. The key mechanism for achieving this is so-called *format selection* based on which you can explicitly specify desired format, select formats based on some criterion or criteria, setup precedence and much more. + +The general syntax for format selection is `--format FORMAT` or shorter `-f FORMAT` where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download. + +**tl;dr:** [navigate me to examples](#format-selection-examples). + +The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific. + +You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file. + +You can also use special names to select particular edge case formats: + + - `best`: Select the best quality format represented by a single file with video and audio. + - `worst`: Select the worst quality format represented by a single file with video and audio. + - `bestvideo`: Select the best quality video-only format (e.g. DASH video). May not be available. + - `worstvideo`: Select the worst quality video-only format. May not be available. + - `bestaudio`: Select the best quality audio only-format. May not be available. + - `worstaudio`: Select the worst quality audio only-format. May not be available. + +For example, to download the worst quality video-only format you can use `-f worstvideo`. + +If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that slash is left-associative, i.e. formats on the left hand side are preferred, for example `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download. + +If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. + +You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`). + +The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals): + + - `filesize`: The number of bytes, if known in advance + - `width`: Width of the video, if known + - `height`: Height of the video, if known + - `tbr`: Average bitrate of audio and video in KBit/s + - `abr`: Average audio bitrate in KBit/s + - `vbr`: Average video bitrate in KBit/s + - `asr`: Audio sampling rate in Hertz + - `fps`: Frame rate + +Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields: + + - `ext`: File extension + - `acodec`: Name of the audio codec in use + - `vcodec`: Name of the video codec in use + - `container`: Name of the container format + - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`) + - `format_id`: A short description of the format + +Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). + +Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster. + +Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s. + +You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg/avconv. + +Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`. + +Since the end of April 2015 and version 2015.04.26, youtube-dl uses `-f bestvideo+bestaudio/best` as the default format selection (see [#5447](https://github.com/ytdl-org/youtube-dl/issues/5447), [#5456](https://github.com/ytdl-org/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed. + +If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download the best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl. + +#### Format selection examples + +Note that on Windows you may need to use double quotes instead of single. + +```bash +# Download best mp4 format available or any other best if no mp4 available +$ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best' + +# Download best format available but no better than 480p +$ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]' + +# Download best video only format but no bigger than 50 MB +$ youtube-dl -f 'best[filesize<50M]' + +# Download best format available via direct link over HTTP/HTTPS protocol +$ youtube-dl -f '(bestvideo+bestaudio/best)[protocol^=http]' + +# Download the best video format and the best audio format without merging them +$ youtube-dl -f 'bestvideo,bestaudio' -o '%(title)s.f%(format_id)s.%(ext)s' +``` +Note that in the last example, an output template is recommended as bestvideo and bestaudio may have the same file name. + + +# VIDEO SELECTION + +Videos can be filtered by their upload date using the options `--date`, `--datebefore` or `--dateafter`. They accept dates in two formats: + + - Absolute dates: Dates in the format `YYYYMMDD`. + - Relative dates: Dates in the format `(now|today)[+-][0-9](day|week|month|year)(s)?` + +Examples: + +```bash +# Download only the videos uploaded in the last 6 months +$ youtube-dl --dateafter now-6months + +# Download only the videos uploaded on January 1, 1970 +$ youtube-dl --date 19700101 + +$ # Download only the videos uploaded in the 200x decade +$ youtube-dl --dateafter 20000101 --datebefore 20091231 +``` + +# FAQ + +### How do I update youtube-dl? + +If you've followed [our manual installation instructions](https://ytdl-org.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`). + +If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update. + +If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to https://yt-dl.org to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum. + +As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like + + sudo apt-get remove -y youtube-dl + +Afterwards, simply follow [our manual installation instructions](https://ytdl-org.github.io/youtube-dl/download.html): + +``` +sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl +sudo chmod a+x /usr/local/bin/youtube-dl +hash -r +``` + +Again, from then on you'll be able to update with `sudo youtube-dl -U`. + +### youtube-dl is extremely slow to start on Windows + +Add a file exclusion for `youtube-dl.exe` in Windows Defender settings. + +### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists + +YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos. + +If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging people](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update. + +### I'm getting an error when trying to use output template: `error: using output template conflicts with using title, video ID or auto number` + +Make sure you are not using `-o` with any of these options `-t`, `--title`, `--id`, `-A` or `--auto-number` set in command line or in a configuration file. Remove the latter if any. + +### Do I always have to pass `-citw`? + +By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, the only option out of `-citw` that is regularly useful is `-i`. + +### Can you please put the `-b` option back? + +Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the `-b` option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you're interested in. In that case, simply request it with the `-f` option and youtube-dl will try to download it. + +### I get HTTP error 402 when trying to download a video. What's this? + +Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/ytdl-org/youtube-dl/issues/154), but at the moment, your best course of action is pointing a web browser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. + +### Do I need any other programs? + +youtube-dl works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. youtube-dl will detect whether avconv/ffmpeg is present and automatically pick the best option. + +Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](https://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed. + +### I have downloaded a video but how can I play it? + +Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](https://www.videolan.org/) or [mplayer](https://www.mplayerhq.hu/). + +### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser. + +It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies and/or HTTP headers. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl. You can also get necessary cookies and HTTP headers from JSON output obtained with `--dump-json`. + +It may be beneficial to use IPv6; in some cases, the restrictions are only applied to IPv4. Some services (sometimes only for a subset of videos) do not restrict the video URL by IP address, cookie, or user-agent, but these are the exception rather than the rule. + +Please bear in mind that some URL protocols are **not** supported by browsers out of the box, including RTMP. If you are using `-g`, your own downloader must support these as well. + +If you want to play the video on a machine that is not running youtube-dl, you can relay the video content from the machine that runs youtube-dl. You can use `-o -` to let youtube-dl stream a video to stdout, or simply allow the player to download the files written by youtube-dl in turn. + +### ERROR: no fmt_url_map or conn information found in video info + +YouTube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. + +### ERROR: unable to download video + +YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. + +### Video URL contains an ampersand and I'm getting some strange output `[1] 2839` or `'v' is not recognized as an internal or external command` + +That's actually the output from your shell. Since ampersand is one of the special shell characters it's interpreted by the shell preventing you from passing the whole URL to youtube-dl. To disable your shell from interpreting the ampersands (or any other special characters) you have to either put the whole URL in quotes or escape them with a backslash (which approach will work depends on your shell). + +For example if your URL is https://www.youtube.com/watch?t=4&v=BaW_jenozKc you should end up with following command: + +```youtube-dl 'https://www.youtube.com/watch?t=4&v=BaW_jenozKc'``` + +or + +```youtube-dl https://www.youtube.com/watch?t=4\&v=BaW_jenozKc``` + +For Windows you have to use the double quotes: + +```youtube-dl "https://www.youtube.com/watch?t=4&v=BaW_jenozKc"``` + +### ExtractorError: Could not find JS function u'OF' + +In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. + +### HTTP Error 429: Too Many Requests or 402: Payment Required + +These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address. + +### SyntaxError: Non-ASCII character + +The error + + File "youtube-dl", line 2 + SyntaxError: Non-ASCII character '\x93' ... + +means you're using an outdated version of Python. Please update to Python 2.6 or 2.7. + +### What is this binary file? Where has the code gone? + +Since June 2012 ([#342](https://github.com/ytdl-org/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`. + +### The exe throws an error due to missing `MSVCR100.dll` + +To run the exe you need to install first the [Microsoft Visual C++ 2010 Redistributable Package (x86)](https://www.microsoft.com/en-US/download/details.aspx?id=5555). + +### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files? + +If you put youtube-dl and ffmpeg in the same directory that you're running the command from, it will work, but that's rather cumbersome. + +To make a different directory work - either for ffmpeg, or for youtube-dl, or for both - simply create the directory (say, `C:\bin`, or `C:\Users\<User name>\bin`), put all the executables directly in there, and then [set your PATH environment variable](https://www.java.com/en/download/help/path.xml) to include that directory. + +From then on, after restarting your shell, you will be able to access both youtube-dl and ffmpeg (and youtube-dl will be able to find ffmpeg) by simply typing `youtube-dl` or `ffmpeg`, no matter what directory you're in. + +### How do I put downloads into a specific folder? + +Use the `-o` to specify an [output template](#output-template), for example `-o "/home/user/videos/%(title)s-%(id)s.%(ext)s"`. If you want this for all of your downloads, put the option into your [configuration file](#configuration). + +### How do I download a video starting with a `-`? + +Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the options with `--`: + + youtube-dl -- -wNyEUrxzFU + youtube-dl "https://www.youtube.com/watch?v=-wNyEUrxzFU" + +### How do I pass cookies to youtube-dl? + +Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`. + +In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox). + +Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format. + +Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare). + +### How do I stream directly to media player? + +You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](https://www.videolan.org/) can be achieved with: + + youtube-dl -o - "https://www.youtube.com/watch?v=BaW_jenozKcj" | vlc - + +### How do I download only new videos from a playlist? + +Use download-archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special file. Each subsequent run with the same `--download-archive` will download only new videos and skip all videos that have been downloaded before. Note that only successful downloads are recorded in the file. + +For example, at first, + + youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" + +will download the complete `PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re` playlist and create a file `archive.txt`. Each subsequent run will only download new videos if any: + + youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" + +### Should I add `--hls-prefer-native` into my config? + +When youtube-dl detects an HLS video, it can download it either with the built-in downloader or ffmpeg. Since many HLS streams are slightly invalid and ffmpeg/youtube-dl each handle some invalid cases better than the other, there is an option to switch the downloader if needed. + +When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg. + +In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://ytdl-org.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader. + +If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case. + +### Can you add support for this anime video site, or site which shows current movies for free? + +As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl. + +A note on the service that they don't host the infringing content, but just link to those who do, is evidence that the service should **not** be included into youtube-dl. The same goes for any DMCA note when the whole front page of the service is filled with videos they are not allowed to distribute. A "fair use" note is equally unconvincing if the service shows copyright-protected videos in full without authorization. + +Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content. + +### How can I speed up work on my issue? + +(Also known as: Help, my important issue not being solved!) The youtube-dl core developer team is quite small. While we do our best to solve as many issues as possible, sometimes that can take quite a while. To speed up your issue, here's what you can do: + +First of all, please do report the issue [at our issue tracker](https://yt-dl.org/bugs). That allows us to coordinate all efforts by users and developers, and serves as a unified point. Unfortunately, the youtube-dl project has grown too large to use personal email as an effective communication channel. + +Please read the [bug reporting instructions](#bugs) below. A lot of bugs lack all the necessary information. If you can, offer proxy, VPN, or shell access to the youtube-dl developers. If you are able to, test the issue from multiple computers in multiple countries to exclude local censorship or misconfiguration issues. + +If nobody is interested in solving your issue, you are welcome to take matters into your own hands and submit a pull request (or coerce/pay somebody else to do so). + +Feel free to bump the issue from time to time by writing a small comment ("Issue is still present in youtube-dl version ...from France, but fixed from Belgium"), but please not more than once a month. Please do not declare your issue as `important` or `urgent`. + +### How can I detect whether a given URL is supported by youtube-dl? + +For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from https://example.com/video/1234567 to https://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. + +It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor. + +If you want to find out whether a given URL is supported, simply call youtube-dl with it. If you get no videos back, chances are the URL is either not referring to a video or unsupported. You can find out which by examining the output (if you run youtube-dl on the console) or catching an `UnsupportedError` exception if you run it from a Python program. + +# Why do I need to go through that much red tape when filing bugs? + +Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in youtube-dl but in general shell usage), because the problem was already reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to youtube-dl. + +youtube-dl is an open-source project manned by too few volunteers, so we'd rather spend time fixing bugs where we are certain none of those simple problems apply, and where we can be reasonably confident to be able to reproduce the issue without asking the reporter repeatedly. As such, the output of `youtube-dl -v YOUR_URL_HERE` is really all that's required to file an issue. The issue template also guides you through some basic steps you can do, such as checking that your version of youtube-dl is current. + +# DEVELOPER INSTRUCTIONS + +Most users do not need to build youtube-dl and can [download the builds](https://ytdl-org.github.io/youtube-dl/download.html) or get them from their distribution. + +To run youtube-dl as a developer, you don't need to build anything either. Simply execute + + python -m youtube_dl + +To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work: + + python -m unittest discover + python test/test_download.py + nosetests + +See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases. + +If you want to create a build of youtube-dl yourself, you'll need + +* python +* make (only GNU make is supported) +* pandoc +* zip +* nosetests + +### Adding support for a new site + +If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**. + +After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`): + +1. [Fork this repository](https://github.com/ytdl-org/youtube-dl/fork) +2. Check out the source code with: + + git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git + +3. Start a new git branch with + + cd youtube-dl + git checkout -b yourextractor + +4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`: + + ```python + # coding: utf-8 + from __future__ import unicode_literals + + from .common import InfoExtractor + + + class YourExtractorIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)' + _TEST = { + 'url': 'https://yourextractor.com/watch/42', + 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'info_dict': { + 'id': '42', + 'ext': 'mp4', + 'title': 'Video title goes here', + 'thumbnail': r're:^https?://.*\.jpg$', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + # TODO more code goes here, for example ... + title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title') + + return { + 'id': video_id, + 'title': title, + 'description': self._og_search_description(webpage), + 'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), + # TODO more properties (see youtube_dl/extractor/common.py) + } + ``` +5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py). +6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. +7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want. +8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart): + + $ flake8 youtube_dl/extractor/yourextractor.py + +9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+. +10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: + + $ git add youtube_dl/extractor/extractors.py + $ git add youtube_dl/extractor/yourextractor.py + $ git commit -m '[yourextractor] Add new extractor' + $ git push origin yourextractor + +11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. + +In any case, thank you very much for your contributions! + +## youtube-dl coding conventions + +This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code. + +Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all. + +### Mandatory and optional metafields + +For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl: + + - `id` (media identifier) + - `title` (media title) + - `url` (media download URL) or `formats` + +In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. + +[Any field](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. + +#### Example + +Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`: + +```python +meta = self._download_json(url, video_id) +``` + +Assume at this point `meta`'s layout is: + +```python +{ + ... + "summary": "some fancy summary text", + ... +} +``` + +Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional meta field you should be ready that this key may be missing from the `meta` dict, so that you should extract it like: + +```python +description = meta.get('summary') # correct +``` + +and not like: + +```python +description = meta['summary'] # incorrect +``` + +The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data). + +Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance: + +```python +description = self._search_regex( + r'<span[^>]+id="title"[^>]*>([^<]+)<', + webpage, 'description', fatal=False) +``` + +With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction. + +You can also pass `default=<some fallback value>`, for example: + +```python +description = self._search_regex( + r'<span[^>]+id="title"[^>]*>([^<]+)<', + webpage, 'description', default=None) +``` + +On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present. + +### Provide fallbacks + +When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable. + +#### Example + +Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like: + +```python +title = meta['title'] +``` + +If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected. + +Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario: + +```python +title = meta.get('title') or self._og_search_title(webpage) +``` + +This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`. + +### Regular expressions + +#### Don't capture groups you don't use + +Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing. + +##### Example + +Don't capture id attribute name here since you can't use it for anything anyway. + +Correct: + +```python +r'(?:id|ID)=(?P<id>\d+)' +``` + +Incorrect: +```python +r'(id|ID)=(?P<id>\d+)' +``` + + +#### Make regular expressions relaxed and flexible + +When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on. + +##### Example + +Say you need to extract `title` from the following HTML code: + +```html +<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">some fancy title</span> +``` + +The code for that task should look similar to: + +```python +title = self._search_regex( + r'<span[^>]+class="title"[^>]*>([^<]+)', webpage, 'title') +``` + +Or even better: + +```python +title = self._search_regex( + r'<span[^>]+class=(["\'])title\1[^>]*>(?P<title>[^<]+)', + webpage, 'title', group='title') +``` + +Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute: + +The code definitely should not look like: + +```python +title = self._search_regex( + r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>', + webpage, 'title', group='title') +``` + +### Long lines policy + +There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse. + +For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit: + +Correct: + +```python +'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' +``` + +Incorrect: + +```python +'https://www.youtube.com/watch?v=FqZTN594JQw&list=' +'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' +``` + +### Use convenience conversion and parsing functions + +Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. + +Use `url_or_none` for safe URL processing. + +Use `try_get` for safe metadata extraction from parsed JSON. + +Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. + +Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions. + +#### More examples + +##### Safely extract optional description from parsed JSON +```python +description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str) +``` + +##### Safely extract more optional metadata +```python +video = try_get(response, lambda x: x['result']['video'][0], dict) or {} +description = video.get('summary') +duration = float_or_none(video.get('durationMs'), scale=1000) +view_count = int_or_none(video.get('views')) +``` + +# EMBEDDING YOUTUBE-DL + +youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/ytdl-org/youtube-dl/issues/new). + +From a Python program, you can embed youtube-dl in a more powerful fashion, like this: + +```python +from __future__ import unicode_literals +import youtube_dl + +ydl_opts = {} +with youtube_dl.YoutubeDL(ydl_opts) as ydl: + ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) +``` + +Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/ytdl-org/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object. + +Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file: + +```python +from __future__ import unicode_literals +import youtube_dl + + +class MyLogger(object): + def debug(self, msg): + pass + + def warning(self, msg): + pass + + def error(self, msg): + print(msg) + + +def my_hook(d): + if d['status'] == 'finished': + print('Done downloading, now converting ...') + + +ydl_opts = { + 'format': 'bestaudio/best', + 'postprocessors': [{ + 'key': 'FFmpegExtractAudio', + 'preferredcodec': 'mp3', + 'preferredquality': '192', + }], + 'logger': MyLogger(), + 'progress_hooks': [my_hook], +} +with youtube_dl.YoutubeDL(ydl_opts) as ydl: + ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) +``` + +# BUGS + +Bugs and suggestions should be reported at: <https://github.com/ytdl-org/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)). + +**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: +``` +$ youtube-dl -v <your command line> +[debug] System config: [] +[debug] User config: [] +[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj'] +[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 +[debug] youtube-dl version 2015.12.06 +[debug] Git HEAD: 135392e +[debug] Python version 2.6.6 - Windows-2003Server-5.2.3790-SP2 +[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 +[debug] Proxy map: {} +... +``` +**Do not post screenshots of verbose logs; only plain text is acceptable.** + +The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever. + +Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist): + +### Is the description of the issue itself sufficient? + +We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts. + +So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious + +- What the problem is +- How it could be fixed +- How your proposed solution would look like + +If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over. + +For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. + +If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/). + +**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL. + +### Are you using the latest version? + +Before reporting any issue, type `youtube-dl -U`. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well. + +### Is the issue already documented? + +Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity. + +### Why are existing options not enough? + +Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. + +### Is there enough context in your bug report? + +People want to solve problems, and often think they do us a favor by breaking down their larger problems (e.g. wanting to skip already downloaded files) to a specific request (e.g. requesting us to look whether the file exists before downloading the info page). However, what often happens is that they break down the problem into two steps: One simple, and one impossible (or extremely complicated one). + +We are then presented with a very complicated request when the original problem could be solved far easier, e.g. by recording the downloaded video IDs in a separate file. To avoid this, you must include the greater context where it is non-obvious. In particular, every feature request that does not consist of adding support for a new site should contain a use case scenario that explains in what situation the missing feature would be useful. + +### Does the issue involve one problem, and one problem only? + +Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones. + +In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service. + +### Is anyone going to need the feature? + +Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. + +### Is your question about youtube-dl? + +It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different, or even the reporter's own, application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug. + +# COPYRIGHT + +youtube-dl is released into the public domain by the copyright holders. + +This README file was originally written by [Daniel Bolton](https://github.com/dbbolton) and is likewise released into the public domain. diff --git a/bin/youtube-dl b/bin/youtube-dl new file mode 100755 index 000000000..fc3cc8ad8 --- /dev/null +++ b/bin/youtube-dl @@ -0,0 +1,6 @@ +#!/usr/bin/env python + +import youtube_dl + +if __name__ == '__main__': + youtube_dl.main() diff --git a/devscripts/SizeOfImage.patch b/devscripts/SizeOfImage.patch new file mode 100644 index 0000000000000000000000000000000000000000..d5845af4641a3a4028d70fe47ece829bcbdad4e2 GIT binary patch literal 147 zcmZ<?cJXv`Gchn?fB<7C&ENuII7MYx8ahU*1xJ2PUzE$hz~bVPARxlvz`)3$z@WHn z21kPM#|vGlhu(QC=Ms|3`Qp0(p~G$(Gamy3;|YdC3`{JH9108yOMRtym~)=+zEpjC UNM5|i)goX5ND)X2ga@(|07LX6`v3p{ literal 0 HcmV?d00001 diff --git a/devscripts/SizeOfImage_w.patch b/devscripts/SizeOfImage_w.patch new file mode 100644 index 0000000000000000000000000000000000000000..c1a338ff3e2927ff28f00cc011686307925adcd5 GIT binary patch literal 148 zcmZ<?cJXv`Gchn?fB+LH&ENuII7MYx8ahU*1xIEt*89!Cz~bVOARxlvz`($$z@WHn z21kPM#|vGlhu(QC=Mv&ANxh$d&~a_a4||}x6AT9!7}yv&6c`N7G6YSjTef>a{;KY> VM{?ptt`-3kK&n7`AUp;j008^TDJB2_ literal 0 HcmV?d00001 diff --git a/devscripts/bash-completion.in b/devscripts/bash-completion.in new file mode 100644 index 000000000..28bd23727 --- /dev/null +++ b/devscripts/bash-completion.in @@ -0,0 +1,29 @@ +__youtube_dl() +{ + local cur prev opts fileopts diropts keywords + COMPREPLY=() + cur="${COMP_WORDS[COMP_CWORD]}" + prev="${COMP_WORDS[COMP_CWORD-1]}" + opts="{{flags}}" + keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory" + fileopts="-a|--batch-file|--download-archive|--cookies|--load-info" + diropts="--cache-dir" + + if [[ ${prev} =~ ${fileopts} ]]; then + COMPREPLY=( $(compgen -f -- ${cur}) ) + return 0 + elif [[ ${prev} =~ ${diropts} ]]; then + COMPREPLY=( $(compgen -d -- ${cur}) ) + return 0 + fi + + if [[ ${cur} =~ : ]]; then + COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) ) + return 0 + elif [[ ${cur} == * ]] ; then + COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) + return 0 + fi +} + +complete -F __youtube_dl youtube-dl diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py new file mode 100755 index 000000000..3d1391334 --- /dev/null +++ b/devscripts/bash-completion.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +import os +from os.path import dirname as dirn +import sys + +sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) +import youtube_dl + +BASH_COMPLETION_FILE = "youtube-dl.bash-completion" +BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in" + + +def build_completion(opt_parser): + opts_flag = [] + for group in opt_parser.option_groups: + for option in group.option_list: + # for every long flag + opts_flag.append(option.get_opt_string()) + with open(BASH_COMPLETION_TEMPLATE) as f: + template = f.read() + with open(BASH_COMPLETION_FILE, "w") as f: + # just using the special char + filled_template = template.replace("{{flags}}", " ".join(opts_flag)) + f.write(filled_template) + + +parser = youtube_dl.parseOpts()[0] +build_completion(parser) diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py new file mode 100644 index 000000000..4a4295ba9 --- /dev/null +++ b/devscripts/buildserver.py @@ -0,0 +1,433 @@ +#!/usr/bin/python3 + +import argparse +import ctypes +import functools +import shutil +import subprocess +import sys +import tempfile +import threading +import traceback +import os.path + +sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__))))) +from youtube_dl.compat import ( + compat_input, + compat_http_server, + compat_str, + compat_urlparse, +) + +# These are not used outside of buildserver.py thus not in compat.py + +try: + import winreg as compat_winreg +except ImportError: # Python 2 + import _winreg as compat_winreg + +try: + import socketserver as compat_socketserver +except ImportError: # Python 2 + import SocketServer as compat_socketserver + + +class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer): + allow_reuse_address = True + + +advapi32 = ctypes.windll.advapi32 + +SC_MANAGER_ALL_ACCESS = 0xf003f +SC_MANAGER_CREATE_SERVICE = 0x02 +SERVICE_WIN32_OWN_PROCESS = 0x10 +SERVICE_AUTO_START = 0x2 +SERVICE_ERROR_NORMAL = 0x1 +DELETE = 0x00010000 +SERVICE_STATUS_START_PENDING = 0x00000002 +SERVICE_STATUS_RUNNING = 0x00000004 +SERVICE_ACCEPT_STOP = 0x1 + +SVCNAME = 'youtubedl_builder' + +LPTSTR = ctypes.c_wchar_p +START_CALLBACK = ctypes.WINFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(LPTSTR)) + + +class SERVICE_TABLE_ENTRY(ctypes.Structure): + _fields_ = [ + ('lpServiceName', LPTSTR), + ('lpServiceProc', START_CALLBACK) + ] + + +HandlerEx = ctypes.WINFUNCTYPE( + ctypes.c_int, # return + ctypes.c_int, # dwControl + ctypes.c_int, # dwEventType + ctypes.c_void_p, # lpEventData, + ctypes.c_void_p, # lpContext, +) + + +def _ctypes_array(c_type, py_array): + ar = (c_type * len(py_array))() + ar[:] = py_array + return ar + + +def win_OpenSCManager(): + res = advapi32.OpenSCManagerW(None, None, SC_MANAGER_ALL_ACCESS) + if not res: + raise Exception('Opening service manager failed - ' + 'are you running this as administrator?') + return res + + +def win_install_service(service_name, cmdline): + manager = win_OpenSCManager() + try: + h = advapi32.CreateServiceW( + manager, service_name, None, + SC_MANAGER_CREATE_SERVICE, SERVICE_WIN32_OWN_PROCESS, + SERVICE_AUTO_START, SERVICE_ERROR_NORMAL, + cmdline, None, None, None, None, None) + if not h: + raise OSError('Service creation failed: %s' % ctypes.FormatError()) + + advapi32.CloseServiceHandle(h) + finally: + advapi32.CloseServiceHandle(manager) + + +def win_uninstall_service(service_name): + manager = win_OpenSCManager() + try: + h = advapi32.OpenServiceW(manager, service_name, DELETE) + if not h: + raise OSError('Could not find service %s: %s' % ( + service_name, ctypes.FormatError())) + + try: + if not advapi32.DeleteService(h): + raise OSError('Deletion failed: %s' % ctypes.FormatError()) + finally: + advapi32.CloseServiceHandle(h) + finally: + advapi32.CloseServiceHandle(manager) + + +def win_service_report_event(service_name, msg, is_error=True): + with open('C:/sshkeys/log', 'a', encoding='utf-8') as f: + f.write(msg + '\n') + + event_log = advapi32.RegisterEventSourceW(None, service_name) + if not event_log: + raise OSError('Could not report event: %s' % ctypes.FormatError()) + + try: + type_id = 0x0001 if is_error else 0x0004 + event_id = 0xc0000000 if is_error else 0x40000000 + lines = _ctypes_array(LPTSTR, [msg]) + + if not advapi32.ReportEventW( + event_log, type_id, 0, event_id, None, len(lines), 0, + lines, None): + raise OSError('Event reporting failed: %s' % ctypes.FormatError()) + finally: + advapi32.DeregisterEventSource(event_log) + + +def win_service_handler(stop_event, *args): + try: + raise ValueError('Handler called with args ' + repr(args)) + TODO + except Exception as e: + tb = traceback.format_exc() + msg = str(e) + '\n' + tb + win_service_report_event(service_name, msg, is_error=True) + raise + + +def win_service_set_status(handle, status_code): + svcStatus = SERVICE_STATUS() + svcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS + svcStatus.dwCurrentState = status_code + svcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP + + svcStatus.dwServiceSpecificExitCode = 0 + + if not advapi32.SetServiceStatus(handle, ctypes.byref(svcStatus)): + raise OSError('SetServiceStatus failed: %r' % ctypes.FormatError()) + + +def win_service_main(service_name, real_main, argc, argv_raw): + try: + # args = [argv_raw[i].value for i in range(argc)] + stop_event = threading.Event() + handler = HandlerEx(functools.partial(stop_event, win_service_handler)) + h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None) + if not h: + raise OSError('Handler registration failed: %s' % + ctypes.FormatError()) + + TODO + except Exception as e: + tb = traceback.format_exc() + msg = str(e) + '\n' + tb + win_service_report_event(service_name, msg, is_error=True) + raise + + +def win_service_start(service_name, real_main): + try: + cb = START_CALLBACK( + functools.partial(win_service_main, service_name, real_main)) + dispatch_table = _ctypes_array(SERVICE_TABLE_ENTRY, [ + SERVICE_TABLE_ENTRY( + service_name, + cb + ), + SERVICE_TABLE_ENTRY(None, ctypes.cast(None, START_CALLBACK)) + ]) + + if not advapi32.StartServiceCtrlDispatcherW(dispatch_table): + raise OSError('ctypes start failed: %s' % ctypes.FormatError()) + except Exception as e: + tb = traceback.format_exc() + msg = str(e) + '\n' + tb + win_service_report_event(service_name, msg, is_error=True) + raise + + +def main(args=None): + parser = argparse.ArgumentParser() + parser.add_argument('-i', '--install', + action='store_const', dest='action', const='install', + help='Launch at Windows startup') + parser.add_argument('-u', '--uninstall', + action='store_const', dest='action', const='uninstall', + help='Remove Windows service') + parser.add_argument('-s', '--service', + action='store_const', dest='action', const='service', + help='Run as a Windows service') + parser.add_argument('-b', '--bind', metavar='<host:port>', + action='store', default='0.0.0.0:8142', + help='Bind to host:port (default %default)') + options = parser.parse_args(args=args) + + if options.action == 'install': + fn = os.path.abspath(__file__).replace('v:', '\\\\vboxsrv\\vbox') + cmdline = '%s %s -s -b %s' % (sys.executable, fn, options.bind) + win_install_service(SVCNAME, cmdline) + return + + if options.action == 'uninstall': + win_uninstall_service(SVCNAME) + return + + if options.action == 'service': + win_service_start(SVCNAME, main) + return + + host, port_str = options.bind.split(':') + port = int(port_str) + + print('Listening on %s:%d' % (host, port)) + srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler) + thr = threading.Thread(target=srv.serve_forever) + thr.start() + compat_input('Press ENTER to shut down') + srv.shutdown() + thr.join() + + +def rmtree(path): + for name in os.listdir(path): + fname = os.path.join(path, name) + if os.path.isdir(fname): + rmtree(fname) + else: + os.chmod(fname, 0o666) + os.remove(fname) + os.rmdir(path) + + +class BuildError(Exception): + def __init__(self, output, code=500): + self.output = output + self.code = code + + def __str__(self): + return self.output + + +class HTTPError(BuildError): + pass + + +class PythonBuilder(object): + def __init__(self, **kwargs): + python_version = kwargs.pop('python', '3.4') + python_path = None + for node in ('Wow6432Node\\', ''): + try: + key = compat_winreg.OpenKey( + compat_winreg.HKEY_LOCAL_MACHINE, + r'SOFTWARE\%sPython\PythonCore\%s\InstallPath' % (node, python_version)) + try: + python_path, _ = compat_winreg.QueryValueEx(key, '') + finally: + compat_winreg.CloseKey(key) + break + except Exception: + pass + + if not python_path: + raise BuildError('No such Python version: %s' % python_version) + + self.pythonPath = python_path + + super(PythonBuilder, self).__init__(**kwargs) + + +class GITInfoBuilder(object): + def __init__(self, **kwargs): + try: + self.user, self.repoName = kwargs['path'][:2] + self.rev = kwargs.pop('rev') + except ValueError: + raise BuildError('Invalid path') + except KeyError as e: + raise BuildError('Missing mandatory parameter "%s"' % e.args[0]) + + path = os.path.join(os.environ['APPDATA'], 'Build archive', self.repoName, self.user) + if not os.path.exists(path): + os.makedirs(path) + self.basePath = tempfile.mkdtemp(dir=path) + self.buildPath = os.path.join(self.basePath, 'build') + + super(GITInfoBuilder, self).__init__(**kwargs) + + +class GITBuilder(GITInfoBuilder): + def build(self): + try: + subprocess.check_output(['git', 'clone', 'git://github.com/%s/%s.git' % (self.user, self.repoName), self.buildPath]) + subprocess.check_output(['git', 'checkout', self.rev], cwd=self.buildPath) + except subprocess.CalledProcessError as e: + raise BuildError(e.output) + + super(GITBuilder, self).build() + + +class YoutubeDLBuilder(object): + authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile', 'ytdl-org'] + + def __init__(self, **kwargs): + if self.repoName != 'youtube-dl': + raise BuildError('Invalid repository "%s"' % self.repoName) + if self.user not in self.authorizedUsers: + raise HTTPError('Unauthorized user "%s"' % self.user, 401) + + super(YoutubeDLBuilder, self).__init__(**kwargs) + + def build(self): + try: + proc = subprocess.Popen([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], stdin=subprocess.PIPE, cwd=self.buildPath) + proc.wait() + #subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], + # cwd=self.buildPath) + except subprocess.CalledProcessError as e: + raise BuildError(e.output) + + super(YoutubeDLBuilder, self).build() + + +class DownloadBuilder(object): + def __init__(self, **kwargs): + self.handler = kwargs.pop('handler') + self.srcPath = os.path.join(self.buildPath, *tuple(kwargs['path'][2:])) + self.srcPath = os.path.abspath(os.path.normpath(self.srcPath)) + if not self.srcPath.startswith(self.buildPath): + raise HTTPError(self.srcPath, 401) + + super(DownloadBuilder, self).__init__(**kwargs) + + def build(self): + if not os.path.exists(self.srcPath): + raise HTTPError('No such file', 404) + if os.path.isdir(self.srcPath): + raise HTTPError('Is a directory: %s' % self.srcPath, 401) + + self.handler.send_response(200) + self.handler.send_header('Content-Type', 'application/octet-stream') + self.handler.send_header('Content-Disposition', 'attachment; filename=%s' % os.path.split(self.srcPath)[-1]) + self.handler.send_header('Content-Length', str(os.stat(self.srcPath).st_size)) + self.handler.end_headers() + + with open(self.srcPath, 'rb') as src: + shutil.copyfileobj(src, self.handler.wfile) + + super(DownloadBuilder, self).build() + + +class CleanupTempDir(object): + def build(self): + try: + rmtree(self.basePath) + except Exception as e: + print('WARNING deleting "%s": %s' % (self.basePath, e)) + + super(CleanupTempDir, self).build() + + +class Null(object): + def __init__(self, **kwargs): + pass + + def start(self): + pass + + def close(self): + pass + + def build(self): + pass + + +class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, CleanupTempDir, Null): + pass + + +class BuildHTTPRequestHandler(compat_http_server.BaseHTTPRequestHandler): + actionDict = {'build': Builder, 'download': Builder} # They're the same, no more caching. + + def do_GET(self): + path = compat_urlparse.urlparse(self.path) + paramDict = dict([(key, value[0]) for key, value in compat_urlparse.parse_qs(path.query).items()]) + action, _, path = path.path.strip('/').partition('/') + if path: + path = path.split('/') + if action in self.actionDict: + try: + builder = self.actionDict[action](path=path, handler=self, **paramDict) + builder.start() + try: + builder.build() + finally: + builder.close() + except BuildError as e: + self.send_response(e.code) + msg = compat_str(e).encode('UTF-8') + self.send_header('Content-Type', 'text/plain; charset=UTF-8') + self.send_header('Content-Length', len(msg)) + self.end_headers() + self.wfile.write(msg) + else: + self.send_response(500, 'Unknown build method "%s"' % action) + else: + self.send_response(500, 'Malformed URL') + +if __name__ == '__main__': + main() diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py new file mode 100644 index 000000000..740f04de0 --- /dev/null +++ b/devscripts/check-porn.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +""" +This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check +if we are not 'age_limit' tagging some porn site + +A second approach implemented relies on a list of porn domains, to activate it +pass the list filename as the only argument +""" + +# Allow direct execution +import os +import sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import gettestcases +from youtube_dl.utils import compat_urllib_parse_urlparse +from youtube_dl.utils import compat_urllib_request + +if len(sys.argv) > 1: + METHOD = 'LIST' + LIST = open(sys.argv[1]).read().decode('utf8').strip() +else: + METHOD = 'EURISTIC' + +for test in gettestcases(): + if METHOD == 'EURISTIC': + try: + webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read() + except Exception: + print('\nFail: {0}'.format(test['name'])) + continue + + webpage = webpage.decode('utf8', 'replace') + + RESULT = 'porn' in webpage.lower() + + elif METHOD == 'LIST': + domain = compat_urllib_parse_urlparse(test['url']).netloc + if not domain: + print('\nFail: {0}'.format(test['name'])) + continue + domain = '.'.join(domain.split('.')[-2:]) + + RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST) + + if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] + or test['info_dict']['age_limit'] != 18): + print('\nPotential missing age_limit check: {0}'.format(test['name'])) + + elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] + and test['info_dict']['age_limit'] == 18): + print('\nPotential false negative: {0}'.format(test['name'])) + + else: + sys.stdout.write('.') + sys.stdout.flush() + +print() diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py new file mode 100644 index 000000000..428111b3f --- /dev/null +++ b/devscripts/create-github-release.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +import base64 +import io +import json +import mimetypes +import netrc +import optparse +import os +import re +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.compat import ( + compat_basestring, + compat_input, + compat_getpass, + compat_print, + compat_urllib_request, +) +from youtube_dl.utils import ( + make_HTTPS_handler, + sanitized_Request, +) + + +class GitHubReleaser(object): + _API_URL = 'https://api.github.com/repos/ytdl-org/youtube-dl/releases' + _UPLOADS_URL = 'https://uploads.github.com/repos/ytdl-org/youtube-dl/releases/%s/assets?name=%s' + _NETRC_MACHINE = 'github.com' + + def __init__(self, debuglevel=0): + self._init_github_account() + https_handler = make_HTTPS_handler({}, debuglevel=debuglevel) + self._opener = compat_urllib_request.build_opener(https_handler) + + def _init_github_account(self): + try: + info = netrc.netrc().authenticators(self._NETRC_MACHINE) + if info is not None: + self._username = info[0] + self._password = info[2] + compat_print('Using GitHub credentials found in .netrc...') + return + else: + compat_print('No GitHub credentials found in .netrc') + except (IOError, netrc.NetrcParseError): + compat_print('Unable to parse .netrc') + self._username = compat_input( + 'Type your GitHub username or email address and press [Return]: ') + self._password = compat_getpass( + 'Type your GitHub password and press [Return]: ') + + def _call(self, req): + if isinstance(req, compat_basestring): + req = sanitized_Request(req) + # Authorizing manually since GitHub does not response with 401 with + # WWW-Authenticate header set (see + # https://developer.github.com/v3/#basic-authentication) + b64 = base64.b64encode( + ('%s:%s' % (self._username, self._password)).encode('utf-8')).decode('ascii') + req.add_header('Authorization', 'Basic %s' % b64) + response = self._opener.open(req).read().decode('utf-8') + return json.loads(response) + + def list_releases(self): + return self._call(self._API_URL) + + def create_release(self, tag_name, name=None, body='', draft=False, prerelease=False): + data = { + 'tag_name': tag_name, + 'target_commitish': 'master', + 'name': name, + 'body': body, + 'draft': draft, + 'prerelease': prerelease, + } + req = sanitized_Request(self._API_URL, json.dumps(data).encode('utf-8')) + return self._call(req) + + def create_asset(self, release_id, asset): + asset_name = os.path.basename(asset) + url = self._UPLOADS_URL % (release_id, asset_name) + # Our files are small enough to be loaded directly into memory. + data = open(asset, 'rb').read() + req = sanitized_Request(url, data) + mime_type, _ = mimetypes.guess_type(asset_name) + req.add_header('Content-Type', mime_type or 'application/octet-stream') + return self._call(req) + + +def main(): + parser = optparse.OptionParser(usage='%prog CHANGELOG VERSION BUILDPATH') + options, args = parser.parse_args() + if len(args) != 3: + parser.error('Expected a version and a build directory') + + changelog_file, version, build_path = args + + with io.open(changelog_file, encoding='utf-8') as inf: + changelog = inf.read() + + mobj = re.search(r'(?s)version %s\n{2}(.+?)\n{3}' % version, changelog) + body = mobj.group(1) if mobj else '' + + releaser = GitHubReleaser() + + new_release = releaser.create_release( + version, name='youtube-dl %s' % version, body=body) + release_id = new_release['id'] + + for asset in os.listdir(build_path): + compat_print('Uploading %s...' % asset) + releaser.create_asset(release_id, os.path.join(build_path, asset)) + + +if __name__ == '__main__': + main() diff --git a/devscripts/fish-completion.in b/devscripts/fish-completion.in new file mode 100644 index 000000000..eb79765da --- /dev/null +++ b/devscripts/fish-completion.in @@ -0,0 +1,5 @@ + +{{commands}} + + +complete --command youtube-dl --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory" diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py new file mode 100755 index 000000000..51d19dd33 --- /dev/null +++ b/devscripts/fish-completion.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +import optparse +import os +from os.path import dirname as dirn +import sys + +sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) +import youtube_dl +from youtube_dl.utils import shell_quote + +FISH_COMPLETION_FILE = 'youtube-dl.fish' +FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in' + +EXTRA_ARGS = { + 'recode-video': ['--arguments', 'mp4 flv ogg webm mkv', '--exclusive'], + + # Options that need a file parameter + 'download-archive': ['--require-parameter'], + 'cookies': ['--require-parameter'], + 'load-info': ['--require-parameter'], + 'batch-file': ['--require-parameter'], +} + + +def build_completion(opt_parser): + commands = [] + + for group in opt_parser.option_groups: + for option in group.option_list: + long_option = option.get_opt_string().strip('-') + complete_cmd = ['complete', '--command', 'youtube-dl', '--long-option', long_option] + if option._short_opts: + complete_cmd += ['--short-option', option._short_opts[0].strip('-')] + if option.help != optparse.SUPPRESS_HELP: + complete_cmd += ['--description', option.help] + complete_cmd.extend(EXTRA_ARGS.get(long_option, [])) + commands.append(shell_quote(complete_cmd)) + + with open(FISH_COMPLETION_TEMPLATE) as f: + template = f.read() + filled_template = template.replace('{{commands}}', '\n'.join(commands)) + with open(FISH_COMPLETION_FILE, 'w') as f: + f.write(filled_template) + + +parser = youtube_dl.parseOpts()[0] +build_completion(parser) diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py new file mode 100644 index 000000000..e3df42cc2 --- /dev/null +++ b/devscripts/generate_aes_testdata.py @@ -0,0 +1,43 @@ +from __future__ import unicode_literals + +import codecs +import subprocess + +import os +import sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.utils import intlist_to_bytes +from youtube_dl.aes import aes_encrypt, key_expansion + +secret_msg = b'Secret message goes here' + + +def hex_str(int_list): + return codecs.encode(intlist_to_bytes(int_list), 'hex') + + +def openssl_encode(algo, key, iv): + cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)] + prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + out, _ = prog.communicate(secret_msg) + return out + + +iv = key = [0x20, 0x15] + 14 * [0] + +r = openssl_encode('aes-128-cbc', key, iv) +print('aes_cbc_decrypt') +print(repr(r)) + +password = key +new_key = aes_encrypt(password, key_expansion(password)) +r = openssl_encode('aes-128-ctr', new_key, iv) +print('aes_decrypt_text 16') +print(repr(r)) + +password = key + 16 * [0] +new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16) +r = openssl_encode('aes-256-ctr', new_key, iv) +print('aes_decrypt_text 32') +print(repr(r)) diff --git a/devscripts/gh-pages/add-version.py b/devscripts/gh-pages/add-version.py new file mode 100755 index 000000000..867ea0048 --- /dev/null +++ b/devscripts/gh-pages/add-version.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +from __future__ import unicode_literals + +import json +import sys +import hashlib +import os.path + + +if len(sys.argv) <= 1: + print('Specify the version number as parameter') + sys.exit() +version = sys.argv[1] + +with open('update/LATEST_VERSION', 'w') as f: + f.write(version) + +versions_info = json.load(open('update/versions.json')) +if 'signature' in versions_info: + del versions_info['signature'] + +new_version = {} + +filenames = { + 'bin': 'youtube-dl', + 'exe': 'youtube-dl.exe', + 'tar': 'youtube-dl-%s.tar.gz' % version} +build_dir = os.path.join('..', '..', 'build', version) +for key, filename in filenames.items(): + url = 'https://yt-dl.org/downloads/%s/%s' % (version, filename) + fn = os.path.join(build_dir, filename) + with open(fn, 'rb') as f: + data = f.read() + if not data: + raise ValueError('File %s is empty!' % fn) + sha256sum = hashlib.sha256(data).hexdigest() + new_version[key] = (url, sha256sum) + +versions_info['versions'][version] = new_version +versions_info['latest'] = version + +with open('update/versions.json', 'w') as jsonf: + json.dump(versions_info, jsonf, indent=4, sort_keys=True) diff --git a/devscripts/gh-pages/generate-download.py b/devscripts/gh-pages/generate-download.py new file mode 100755 index 000000000..a873d32ee --- /dev/null +++ b/devscripts/gh-pages/generate-download.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +from __future__ import unicode_literals + +import json + +versions_info = json.load(open('update/versions.json')) +version = versions_info['latest'] +version_dict = versions_info['versions'][version] + +# Read template page +with open('download.html.in', 'r', encoding='utf-8') as tmplf: + template = tmplf.read() + +template = template.replace('@PROGRAM_VERSION@', version) +template = template.replace('@PROGRAM_URL@', version_dict['bin'][0]) +template = template.replace('@PROGRAM_SHA256SUM@', version_dict['bin'][1]) +template = template.replace('@EXE_URL@', version_dict['exe'][0]) +template = template.replace('@EXE_SHA256SUM@', version_dict['exe'][1]) +template = template.replace('@TAR_URL@', version_dict['tar'][0]) +template = template.replace('@TAR_SHA256SUM@', version_dict['tar'][1]) +with open('download.html', 'w', encoding='utf-8') as dlf: + dlf.write(template) diff --git a/devscripts/gh-pages/sign-versions.py b/devscripts/gh-pages/sign-versions.py new file mode 100755 index 000000000..fa389c358 --- /dev/null +++ b/devscripts/gh-pages/sign-versions.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +from __future__ import unicode_literals, with_statement + +import rsa +import json +from binascii import hexlify + +try: + input = raw_input +except NameError: + pass + +versions_info = json.load(open('update/versions.json')) +if 'signature' in versions_info: + del versions_info['signature'] + +print('Enter the PKCS1 private key, followed by a blank line:') +privkey = b'' +while True: + try: + line = input() + except EOFError: + break + if line == '': + break + privkey += line.encode('ascii') + b'\n' +privkey = rsa.PrivateKey.load_pkcs1(privkey) + +signature = hexlify(rsa.pkcs1.sign(json.dumps(versions_info, sort_keys=True).encode('utf-8'), privkey, 'SHA-256')).decode() +print('signature: ' + signature) + +versions_info['signature'] = signature +with open('update/versions.json', 'w') as versionsf: + json.dump(versions_info, versionsf, indent=4, sort_keys=True) diff --git a/devscripts/gh-pages/update-copyright.py b/devscripts/gh-pages/update-copyright.py new file mode 100755 index 000000000..61487f925 --- /dev/null +++ b/devscripts/gh-pages/update-copyright.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# coding: utf-8 + +from __future__ import with_statement, unicode_literals + +import datetime +import glob +import io # For Python 2 compatibility +import os +import re + +year = str(datetime.datetime.now().year) +for fn in glob.glob('*.html*'): + with io.open(fn, encoding='utf-8') as f: + content = f.read() + newc = re.sub(r'(?P<copyright>Copyright © 2011-)(?P<year>[0-9]{4})', 'Copyright © 2011-' + year, content) + if content != newc: + tmpFn = fn + '.part' + with io.open(tmpFn, 'wt', encoding='utf-8') as outf: + outf.write(newc) + os.rename(tmpFn, fn) diff --git a/devscripts/gh-pages/update-feed.py b/devscripts/gh-pages/update-feed.py new file mode 100755 index 000000000..506a62377 --- /dev/null +++ b/devscripts/gh-pages/update-feed.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +from __future__ import unicode_literals + +import datetime +import io +import json +import textwrap + + +atom_template = textwrap.dedent("""\ + <?xml version="1.0" encoding="utf-8"?> + <feed xmlns="http://www.w3.org/2005/Atom"> + <link rel="self" href="http://ytdl-org.github.io/youtube-dl/update/releases.atom" /> + <title>youtube-dl releases + https://yt-dl.org/feed/youtube-dl-updates-feed + @TIMESTAMP@ + @ENTRIES@ + """) + +entry_template = textwrap.dedent(""" + + https://yt-dl.org/feed/youtube-dl-updates-feed/youtube-dl-@VERSION@ + New version @VERSION@ + + +
+ Downloads available at https://yt-dl.org/downloads/@VERSION@/ +
+
+ + The youtube-dl maintainers + + @TIMESTAMP@ +
+ """) + +now = datetime.datetime.now() +now_iso = now.isoformat() + 'Z' + +atom_template = atom_template.replace('@TIMESTAMP@', now_iso) + +versions_info = json.load(open('update/versions.json')) +versions = list(versions_info['versions'].keys()) +versions.sort() + +entries = [] +for v in versions: + fields = v.split('.') + year, month, day = map(int, fields[:3]) + faked = 0 + patchlevel = 0 + while True: + try: + datetime.date(year, month, day) + except ValueError: + day -= 1 + faked += 1 + assert day > 0 + continue + break + if len(fields) >= 4: + try: + patchlevel = int(fields[3]) + except ValueError: + patchlevel = 1 + timestamp = '%04d-%02d-%02dT00:%02d:%02dZ' % (year, month, day, faked, patchlevel) + + entry = entry_template.replace('@TIMESTAMP@', timestamp) + entry = entry.replace('@VERSION@', v) + entries.append(entry) + +entries_str = textwrap.indent(''.join(entries), '\t') +atom_template = atom_template.replace('@ENTRIES@', entries_str) + +with io.open('update/releases.atom', 'w', encoding='utf-8') as atom_file: + atom_file.write(atom_template) diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py new file mode 100755 index 000000000..531c93c70 --- /dev/null +++ b/devscripts/gh-pages/update-sites.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +from __future__ import unicode_literals + +import sys +import os +import textwrap + +# We must be able to import youtube_dl +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) + +import youtube_dl + + +def main(): + with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf: + template = tmplf.read() + + ie_htmls = [] + for ie in youtube_dl.list_extractors(age_limit=None): + ie_html = '{}'.format(ie.IE_NAME) + ie_desc = getattr(ie, 'IE_DESC', None) + if ie_desc is False: + continue + elif ie_desc is not None: + ie_html += ': {}'.format(ie.IE_DESC) + if not ie.working(): + ie_html += ' (Currently broken)' + ie_htmls.append('
  • {}
  • '.format(ie_html)) + + template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t')) + + with open('supportedsites.html', 'w', encoding='utf-8') as sitesf: + sitesf.write(template) + + +if __name__ == '__main__': + main() diff --git a/devscripts/install_jython.sh b/devscripts/install_jython.sh new file mode 100755 index 000000000..bafca4da4 --- /dev/null +++ b/devscripts/install_jython.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +wget http://central.maven.org/maven2/org/python/jython-installer/2.7.1/jython-installer-2.7.1.jar +java -jar jython-installer-2.7.1.jar -s -d "$HOME/jython" +$HOME/jython/bin/jython -m pip install nose diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py new file mode 100644 index 000000000..c4e5fc1f4 --- /dev/null +++ b/devscripts/lazy_load_template.py @@ -0,0 +1,19 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + + +class LazyLoadExtractor(object): + _module = None + + @classmethod + def ie_key(cls): + return cls.__name__[:-2] + + def __new__(cls, *args, **kwargs): + mod = __import__(cls._module, fromlist=(cls.__name__,)) + real_cls = getattr(mod, cls.__name__) + instance = real_cls.__new__(real_cls) + instance.__init__(*args, **kwargs) + return instance diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py new file mode 100755 index 000000000..226d1a5d6 --- /dev/null +++ b/devscripts/make_contributing.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +import io +import optparse +import re + + +def main(): + parser = optparse.OptionParser(usage='%prog INFILE OUTFILE') + options, args = parser.parse_args() + if len(args) != 2: + parser.error('Expected an input and an output filename') + + infile, outfile = args + + with io.open(infile, encoding='utf-8') as inf: + readme = inf.read() + + bug_text = re.search( + r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1) + dev_text = re.search( + r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING YOUTUBE-DL', + readme).group(1) + + out = bug_text + dev_text + + with io.open(outfile, 'w', encoding='utf-8') as outf: + outf.write(out) + + +if __name__ == '__main__': + main() diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py new file mode 100644 index 000000000..b7ad23d83 --- /dev/null +++ b/devscripts/make_issue_template.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +import io +import optparse + + +def main(): + parser = optparse.OptionParser(usage='%prog INFILE OUTFILE') + options, args = parser.parse_args() + if len(args) != 2: + parser.error('Expected an input and an output filename') + + infile, outfile = args + + with io.open(infile, encoding='utf-8') as inf: + issue_template_tmpl = inf.read() + + # Get the version from youtube_dl/version.py without importing the package + exec(compile(open('youtube_dl/version.py').read(), + 'youtube_dl/version.py', 'exec')) + + out = issue_template_tmpl % {'version': locals()['__version__']} + + with io.open(outfile, 'w', encoding='utf-8') as outf: + outf.write(out) + +if __name__ == '__main__': + main() diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py new file mode 100644 index 000000000..0a1762dbc --- /dev/null +++ b/devscripts/make_lazy_extractors.py @@ -0,0 +1,100 @@ +from __future__ import unicode_literals, print_function + +from inspect import getsource +import io +import os +from os.path import dirname as dirn +import sys + +print('WARNING: Lazy loading extractors is an experimental feature that may not always work', file=sys.stderr) + +sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) + +lazy_extractors_filename = sys.argv[1] +if os.path.exists(lazy_extractors_filename): + os.remove(lazy_extractors_filename) + +from youtube_dl.extractor import _ALL_CLASSES +from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor + +with open('devscripts/lazy_load_template.py', 'rt') as f: + module_template = f.read() + +module_contents = [ + module_template + '\n' + getsource(InfoExtractor.suitable) + '\n', + 'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n'] + +ie_template = ''' +class {name}({bases}): + _VALID_URL = {valid_url!r} + _module = '{module}' +''' + +make_valid_template = ''' + @classmethod + def _make_valid_url(cls): + return {valid_url!r} +''' + + +def get_base_name(base): + if base is InfoExtractor: + return 'LazyLoadExtractor' + elif base is SearchInfoExtractor: + return 'LazyLoadSearchExtractor' + else: + return base.__name__ + + +def build_lazy_ie(ie, name): + valid_url = getattr(ie, '_VALID_URL', None) + s = ie_template.format( + name=name, + bases=', '.join(map(get_base_name, ie.__bases__)), + valid_url=valid_url, + module=ie.__module__) + if ie.suitable.__func__ is not InfoExtractor.suitable.__func__: + s += '\n' + getsource(ie.suitable) + if hasattr(ie, '_make_valid_url'): + # search extractors + s += make_valid_template.format(valid_url=ie._make_valid_url()) + return s + + +# find the correct sorting and add the required base classes so that sublcasses +# can be correctly created +classes = _ALL_CLASSES[:-1] +ordered_cls = [] +while classes: + for c in classes[:]: + bases = set(c.__bases__) - set((object, InfoExtractor, SearchInfoExtractor)) + stop = False + for b in bases: + if b not in classes and b not in ordered_cls: + if b.__name__ == 'GenericIE': + exit() + classes.insert(0, b) + stop = True + if stop: + break + if all(b in ordered_cls for b in bases): + ordered_cls.append(c) + classes.remove(c) + break +ordered_cls.append(_ALL_CLASSES[-1]) + +names = [] +for ie in ordered_cls: + name = ie.__name__ + src = build_lazy_ie(ie, name) + module_contents.append(src) + if ie in _ALL_CLASSES: + names.append(name) + +module_contents.append( + '_ALL_CLASSES = [{0}]'.format(', '.join(names))) + +module_src = '\n'.join(module_contents) + '\n' + +with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f: + f.write(module_src) diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py new file mode 100755 index 000000000..8fbce0796 --- /dev/null +++ b/devscripts/make_readme.py @@ -0,0 +1,26 @@ +from __future__ import unicode_literals + +import io +import sys +import re + +README_FILE = 'README.md' +helptext = sys.stdin.read() + +if isinstance(helptext, bytes): + helptext = helptext.decode('utf-8') + +with io.open(README_FILE, encoding='utf-8') as f: + oldreadme = f.read() + +header = oldreadme[:oldreadme.index('# OPTIONS')] +footer = oldreadme[oldreadme.index('# CONFIGURATION'):] + +options = helptext[helptext.index(' General Options:') + 19:] +options = re.sub(r'(?m)^ (\w.+)$', r'## \1', options) +options = '# OPTIONS\n' + options + '\n' + +with io.open(README_FILE, 'w', encoding='utf-8') as f: + f.write(header) + f.write(options) + f.write(footer) diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py new file mode 100644 index 000000000..764795bc5 --- /dev/null +++ b/devscripts/make_supportedsites.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +import io +import optparse +import os +import sys + + +# Import youtube_dl +ROOT_DIR = os.path.join(os.path.dirname(__file__), '..') +sys.path.insert(0, ROOT_DIR) +import youtube_dl + + +def main(): + parser = optparse.OptionParser(usage='%prog OUTFILE.md') + options, args = parser.parse_args() + if len(args) != 1: + parser.error('Expected an output filename') + + outfile, = args + + def gen_ies_md(ies): + for ie in ies: + ie_md = '**{0}**'.format(ie.IE_NAME) + ie_desc = getattr(ie, 'IE_DESC', None) + if ie_desc is False: + continue + if ie_desc is not None: + ie_md += ': {0}'.format(ie.IE_DESC) + if not ie.working(): + ie_md += ' (Currently broken)' + yield ie_md + + ies = sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower()) + out = '# Supported sites\n' + ''.join( + ' - ' + md + '\n' + for md in gen_ies_md(ies)) + + with io.open(outfile, 'w', encoding='utf-8') as outf: + outf.write(out) + + +if __name__ == '__main__': + main() diff --git a/devscripts/posix-locale.sh b/devscripts/posix-locale.sh new file mode 100755 index 000000000..0aa7a592d --- /dev/null +++ b/devscripts/posix-locale.sh @@ -0,0 +1,6 @@ + +# source this file in your shell to get a POSIX locale (which will break many programs, but that's kind of the point) + +export LC_ALL=POSIX +export LANG=POSIX +export LANGUAGE=POSIX diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py new file mode 100644 index 000000000..76bf873e1 --- /dev/null +++ b/devscripts/prepare_manpage.py @@ -0,0 +1,79 @@ +from __future__ import unicode_literals + +import io +import optparse +import os.path +import re + +ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +README_FILE = os.path.join(ROOT_DIR, 'README.md') + +PREFIX = r'''%YOUTUBE-DL(1) + +# NAME + +youtube\-dl \- download videos from youtube.com or other video platforms + +# SYNOPSIS + +**youtube-dl** \[OPTIONS\] URL [URL...] + +''' + + +def main(): + parser = optparse.OptionParser(usage='%prog OUTFILE.md') + options, args = parser.parse_args() + if len(args) != 1: + parser.error('Expected an output filename') + + outfile, = args + + with io.open(README_FILE, encoding='utf-8') as f: + readme = f.read() + + readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme) + readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme) + readme = PREFIX + readme + + readme = filter_options(readme) + + with io.open(outfile, 'w', encoding='utf-8') as outf: + outf.write(readme) + + +def filter_options(readme): + ret = '' + in_options = False + for line in readme.split('\n'): + if line.startswith('# '): + if line[2:].startswith('OPTIONS'): + in_options = True + else: + in_options = False + + if in_options: + if line.lstrip().startswith('-'): + split = re.split(r'\s{2,}', line.lstrip()) + # Description string may start with `-` as well. If there is + # only one piece then it's a description bit not an option. + if len(split) > 1: + option, description = split + split_option = option.split(' ') + + if not split_option[-1].startswith('-'): # metavar + option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]]) + + # Pandoc's definition_lists. See http://pandoc.org/README.html + # for more information. + ret += '\n%s\n: %s\n' % (option, description) + continue + ret += line.lstrip() + '\n' + else: + ret += line + '\n' + + return ret + + +if __name__ == '__main__': + main() diff --git a/devscripts/release.sh b/devscripts/release.sh new file mode 100755 index 000000000..f2411c927 --- /dev/null +++ b/devscripts/release.sh @@ -0,0 +1,141 @@ +#!/bin/bash + +# IMPORTANT: the following assumptions are made +# * the GH repo is on the origin remote +# * the gh-pages branch is named so locally +# * the git config user.signingkey is properly set + +# You will need +# pip install coverage nose rsa wheel + +# TODO +# release notes +# make hash on local files + +set -e + +skip_tests=true +gpg_sign_commits="" +buildserver='localhost:8142' + +while true +do +case "$1" in + --run-tests) + skip_tests=false + shift + ;; + --gpg-sign-commits|-S) + gpg_sign_commits="-S" + shift + ;; + --buildserver) + buildserver="$2" + shift 2 + ;; + --*) + echo "ERROR: unknown option $1" + exit 1 + ;; + *) + break + ;; +esac +done + +if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi +version="$1" +major_version=$(echo "$version" | sed -n 's#^\([0-9]*\.[0-9]*\.[0-9]*\).*#\1#p') +if test "$major_version" '!=' "$(date '+%Y.%m.%d')"; then + echo "$version does not start with today's date!" + exit 1 +fi + +if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already present'; exit 1; fi +if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi +useless_files=$(find youtube_dl -type f -not -name '*.py') +if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $useless_files"; exit 1; fi +if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi +if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi +if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi +if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi + +read -p "Is ChangeLog up to date? (y/n) " -n 1 +if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi + +/bin/echo -e "\n### First of all, testing..." +make clean +if $skip_tests ; then + echo 'SKIPPING TESTS' +else + nosetests --verbose --with-coverage --cover-package=youtube_dl --cover-html test --stop || exit 1 +fi + +/bin/echo -e "\n### Changing version in version.py..." +sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py + +/bin/echo -e "\n### Changing version in ChangeLog..." +sed -i "s//$version/" ChangeLog + +/bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..." +make README.md CONTRIBUTING.md issuetemplates supportedsites +git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md .github/ISSUE_TEMPLATE/6_question.md docs/supportedsites.md youtube_dl/version.py ChangeLog +git commit $gpg_sign_commits -m "release $version" + +/bin/echo -e "\n### Now tagging, signing and pushing..." +git tag -s -m "Release $version" "$version" +git show "$version" +read -p "Is it good, can I push? (y/n) " -n 1 +if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi +echo +MASTER=$(git rev-parse --abbrev-ref HEAD) +git push origin $MASTER:master +git push origin "$version" + +/bin/echo -e "\n### OK, now it is time to build the binaries..." +REV=$(git rev-parse HEAD) +make youtube-dl youtube-dl.tar.gz +read -p "VM running? (y/n) " -n 1 +wget "http://$buildserver/build/ytdl-org/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe +mkdir -p "build/$version" +mv youtube-dl youtube-dl.exe "build/$version" +mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz" +RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz" +(cd build/$version/ && md5sum $RELEASE_FILES > MD5SUMS) +(cd build/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS) +(cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS) +(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS) + +/bin/echo -e "\n### Signing and uploading the new binaries to GitHub..." +for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done + +ROOT=$(pwd) +python devscripts/create-github-release.py ChangeLog $version "$ROOT/build/$version" + +ssh ytdl@yt-dl.org "sh html/update_latest.sh $version" + +/bin/echo -e "\n### Now switching to gh-pages..." +git clone --branch gh-pages --single-branch . build/gh-pages +( + set -e + ORIGIN_URL=$(git config --get remote.origin.url) + cd build/gh-pages + "$ROOT/devscripts/gh-pages/add-version.py" $version + "$ROOT/devscripts/gh-pages/update-feed.py" + "$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem" + "$ROOT/devscripts/gh-pages/generate-download.py" + "$ROOT/devscripts/gh-pages/update-copyright.py" + "$ROOT/devscripts/gh-pages/update-sites.py" + git add *.html *.html.in update + git commit $gpg_sign_commits -m "release $version" + git push "$ROOT" gh-pages + git push "$ORIGIN_URL" gh-pages +) +rm -rf build + +make pypi-files +echo "Uploading to PyPi ..." +python setup.py sdist bdist_wheel upload +make clean + +/bin/echo -e "\n### DONE!" diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh new file mode 100755 index 000000000..dd37a80f5 --- /dev/null +++ b/devscripts/run_tests.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# Keep this list in sync with the `offlinetest` target in Makefile +DOWNLOAD_TESTS="age_restriction|download|iqiyi_sdk_interpreter|socks|subtitles|write_annotations|youtube_lists|youtube_signature" + +test_set="" +multiprocess_args="" + +case "$YTDL_TEST_SET" in + core) + test_set="-I test_($DOWNLOAD_TESTS)\.py" + ;; + download) + test_set="-I test_(?!$DOWNLOAD_TESTS).+\.py" + multiprocess_args="--processes=4 --process-timeout=540" + ;; + *) + break + ;; +esac + +nosetests test --verbose $test_set $multiprocess_args diff --git a/devscripts/show-downloads-statistics.py b/devscripts/show-downloads-statistics.py new file mode 100644 index 000000000..6c8d1cc2d --- /dev/null +++ b/devscripts/show-downloads-statistics.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +import itertools +import json +import os +import re +import sys + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.compat import ( + compat_print, + compat_urllib_request, +) +from youtube_dl.utils import format_bytes + + +def format_size(bytes): + return '%s (%d bytes)' % (format_bytes(bytes), bytes) + + +total_bytes = 0 + +for page in itertools.count(1): + releases = json.loads(compat_urllib_request.urlopen( + 'https://api.github.com/repos/ytdl-org/youtube-dl/releases?page=%s' % page + ).read().decode('utf-8')) + + if not releases: + break + + for release in releases: + compat_print(release['name']) + for asset in release['assets']: + asset_name = asset['name'] + total_bytes += asset['download_count'] * asset['size'] + if all(not re.match(p, asset_name) for p in ( + r'^youtube-dl$', + r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$', + r'^youtube-dl\.exe$')): + continue + compat_print( + ' %s size: %s downloads: %d' + % (asset_name, format_size(asset['size']), asset['download_count'])) + +compat_print('total downloads traffic: %s' % format_size(total_bytes)) diff --git a/devscripts/wine-py2exe.sh b/devscripts/wine-py2exe.sh new file mode 100755 index 000000000..dc2d6501a --- /dev/null +++ b/devscripts/wine-py2exe.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# Run with as parameter a setup.py that works in the current directory +# e.g. no os.chdir() +# It will run twice, the first time will crash + +set -e + +SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )" + +if [ ! -d wine-py2exe ]; then + + sudo apt-get install wine1.3 axel bsdiff + + mkdir wine-py2exe + cd wine-py2exe + export WINEPREFIX=`pwd` + + axel -a "http://www.python.org/ftp/python/2.7/python-2.7.msi" + axel -a "http://downloads.sourceforge.net/project/py2exe/py2exe/0.6.9/py2exe-0.6.9.win32-py2.7.exe" + #axel -a "http://winetricks.org/winetricks" + + # http://appdb.winehq.org/objectManager.php?sClass=version&iId=21957 + echo "Follow python setup on screen" + wine msiexec /i python-2.7.msi + + echo "Follow py2exe setup on screen" + wine py2exe-0.6.9.win32-py2.7.exe + + #echo "Follow Microsoft Visual C++ 2008 Redistributable Package setup on screen" + #bash winetricks vcrun2008 + + rm py2exe-0.6.9.win32-py2.7.exe + rm python-2.7.msi + #rm winetricks + + # http://bugs.winehq.org/show_bug.cgi?id=3591 + + mv drive_c/Python27/Lib/site-packages/py2exe/run.exe drive_c/Python27/Lib/site-packages/py2exe/run.exe.backup + bspatch drive_c/Python27/Lib/site-packages/py2exe/run.exe.backup drive_c/Python27/Lib/site-packages/py2exe/run.exe "$SCRIPT_DIR/SizeOfImage.patch" + mv drive_c/Python27/Lib/site-packages/py2exe/run_w.exe drive_c/Python27/Lib/site-packages/py2exe/run_w.exe.backup + bspatch drive_c/Python27/Lib/site-packages/py2exe/run_w.exe.backup drive_c/Python27/Lib/site-packages/py2exe/run_w.exe "$SCRIPT_DIR/SizeOfImage_w.patch" + + cd - + +else + + export WINEPREFIX="$( cd wine-py2exe && pwd )" + +fi + +wine "C:\\Python27\\python.exe" "$1" py2exe > "py2exe.log" 2>&1 || true +echo '# Copying python27.dll' >> "py2exe.log" +cp "$WINEPREFIX/drive_c/windows/system32/python27.dll" build/bdist.win32/winexe/bundle-2.7/ +wine "C:\\Python27\\python.exe" "$1" py2exe >> "py2exe.log" 2>&1 + diff --git a/devscripts/zsh-completion.in b/devscripts/zsh-completion.in new file mode 100644 index 000000000..b394a1ae7 --- /dev/null +++ b/devscripts/zsh-completion.in @@ -0,0 +1,28 @@ +#compdef youtube-dl + +__youtube_dl() { + local curcontext="$curcontext" fileopts diropts cur prev + typeset -A opt_args + fileopts="{{fileopts}}" + diropts="{{diropts}}" + cur=$words[CURRENT] + case $cur in + :) + _arguments '*: :(::ytfavorites ::ytrecommended ::ytsubscriptions ::ytwatchlater ::ythistory)' + ;; + *) + prev=$words[CURRENT-1] + if [[ ${prev} =~ ${fileopts} ]]; then + _path_files + elif [[ ${prev} =~ ${diropts} ]]; then + _path_files -/ + elif [[ ${prev} == "--recode-video" ]]; then + _arguments '*: :(mp4 flv ogg webm mkv)' + else + _arguments '*: :({{flags}})' + fi + ;; + esac +} + +__youtube_dl \ No newline at end of file diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py new file mode 100755 index 000000000..60aaf76cc --- /dev/null +++ b/devscripts/zsh-completion.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +import os +from os.path import dirname as dirn +import sys + +sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) +import youtube_dl + +ZSH_COMPLETION_FILE = "youtube-dl.zsh" +ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in" + + +def build_completion(opt_parser): + opts = [opt for group in opt_parser.option_groups + for opt in group.option_list] + opts_file = [opt for opt in opts if opt.metavar == "FILE"] + opts_dir = [opt for opt in opts if opt.metavar == "DIR"] + + fileopts = [] + for opt in opts_file: + if opt._short_opts: + fileopts.extend(opt._short_opts) + if opt._long_opts: + fileopts.extend(opt._long_opts) + + diropts = [] + for opt in opts_dir: + if opt._short_opts: + diropts.extend(opt._short_opts) + if opt._long_opts: + diropts.extend(opt._long_opts) + + flags = [opt.get_opt_string() for opt in opts] + + with open(ZSH_COMPLETION_TEMPLATE) as f: + template = f.read() + + template = template.replace("{{fileopts}}", "|".join(fileopts)) + template = template.replace("{{diropts}}", "|".join(diropts)) + template = template.replace("{{flags}}", " ".join(flags)) + + with open(ZSH_COMPLETION_FILE, "w") as f: + f.write(template) + + +parser = youtube_dl.parseOpts()[0] +build_completion(parser) diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 000000000..69fa449dd --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1 @@ +_build/ diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 000000000..712218045 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,177 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/youtube-dl.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/youtube-dl.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/youtube-dl" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/youtube-dl" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 000000000..0aaf1b8fc --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,71 @@ +# coding: utf-8 +# +# youtube-dl documentation build configuration file, created by +# sphinx-quickstart on Fri Mar 14 21:05:43 2014. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os +# Allows to import youtube_dl +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +# -- General configuration ------------------------------------------------ + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'youtube-dl' +copyright = u'2014, Ricardo Garcia Gonzalez' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +from youtube_dl.version import __version__ +version = __version__ +# The full version, including alpha/beta/rc tags. +release = version + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'default' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Output file base name for HTML help builder. +htmlhelp_basename = 'youtube-dldoc' diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 000000000..b746ff95b --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,23 @@ +Welcome to youtube-dl's documentation! +====================================== + +*youtube-dl* is a command-line program to download videos from YouTube.com and more sites. +It can also be used in Python code. + +Developer guide +--------------- + +This section contains information for using *youtube-dl* from Python programs. + +.. toctree:: + :maxdepth: 2 + + module_guide + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/module_guide.rst b/docs/module_guide.rst new file mode 100644 index 000000000..03d72882e --- /dev/null +++ b/docs/module_guide.rst @@ -0,0 +1,67 @@ +Using the ``youtube_dl`` module +=============================== + +When using the ``youtube_dl`` module, you start by creating an instance of :class:`YoutubeDL` and adding all the available extractors: + +.. code-block:: python + + >>> from youtube_dl import YoutubeDL + >>> ydl = YoutubeDL() + >>> ydl.add_default_info_extractors() + +Extracting video information +---------------------------- + +You use the :meth:`YoutubeDL.extract_info` method for getting the video information, which returns a dictionary: + +.. code-block:: python + + >>> info = ydl.extract_info('http://www.youtube.com/watch?v=BaW_jenozKc', download=False) + [youtube] Setting language + [youtube] BaW_jenozKc: Downloading webpage + [youtube] BaW_jenozKc: Downloading video info webpage + [youtube] BaW_jenozKc: Extracting video information + >>> info['title'] + 'youtube-dl test video "\'/\\ä↭𝕐' + >>> info['height'], info['width'] + (720, 1280) + +If you want to download or play the video you can get its url: + +.. code-block:: python + + >>> info['url'] + 'https://...' + +Extracting playlist information +------------------------------- + +The playlist information is extracted in a similar way, but the dictionary is a bit different: + +.. code-block:: python + + >>> playlist = ydl.extract_info('http://www.ted.com/playlists/13/open_source_open_world', download=False) + [TED] open_source_open_world: Downloading playlist webpage + ... + >>> playlist['title'] + 'Open-source, open world' + + + +You can access the videos in the playlist with the ``entries`` field: + +.. code-block:: python + + >>> for video in playlist['entries']: + ... print('Video #%d: %s' % (video['playlist_index'], video['title'])) + + Video #1: How Arduino is open-sourcing imagination + Video #2: The year open data went worldwide + Video #3: Massive-scale online collaboration + Video #4: The art of asking + Video #5: How cognitive surplus will change the world + Video #6: The birth of Wikipedia + Video #7: Coding a better government + Video #8: The era of open innovation + Video #9: The currency of the new economy is trust + diff --git a/docs/supportedsites.md b/docs/supportedsites.md new file mode 100644 index 000000000..55ae43144 --- /dev/null +++ b/docs/supportedsites.md @@ -0,0 +1,1169 @@ +# Supported sites + - **1tv**: Первый канал + - **1up.com** + - **20min** + - **220.ro** + - **23video** + - **24video** + - **3qsdn**: 3Q SDN + - **3sat** + - **4tube** + - **56.com** + - **5min** + - **6play** + - **7plus** + - **8tracks** + - **91porn** + - **9c9media** + - **9gag** + - **9now.com.au** + - **abc.net.au** + - **abc.net.au:iview** + - **abcnews** + - **abcnews:video** + - **abcotvs**: ABC Owned Television Stations + - **abcotvs:clips** + - **AcademicEarth:Course** + - **acast** + - **acast:channel** + - **AddAnime** + - **ADN**: Anime Digital Network + - **AdobeConnect** + - **AdobeTV** + - **AdobeTVChannel** + - **AdobeTVShow** + - **AdobeTVVideo** + - **AdultSwim** + - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault + - **afreecatv**: afreecatv.com + - **AirMozilla** + - **AliExpressLive** + - **AlJazeera** + - **Allocine** + - **AlphaPorno** + - **AMCNetworks** + - **AmericasTestKitchen** + - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl + - **AnimeOnDemand** + - **Anvato** + - **aol.com** + - **APA** + - **Aparat** + - **AppleConnect** + - **AppleDaily**: 臺灣蘋果日報 + - **appletrailers** + - **appletrailers:section** + - **archive.org**: archive.org videos + - **ARD** + - **ARD:mediathek** + - **ARDBetaMediathek** + - **Arkena** + - **arte.tv** + - **arte.tv:+7** + - **arte.tv:cinema** + - **arte.tv:concert** + - **arte.tv:creative** + - **arte.tv:ddc** + - **arte.tv:embed** + - **arte.tv:future** + - **arte.tv:info** + - **arte.tv:magazine** + - **arte.tv:playlist** + - **AsianCrush** + - **AsianCrushPlaylist** + - **AtresPlayer** + - **ATTTechChannel** + - **ATVAt** + - **AudiMedia** + - **AudioBoom** + - **audiomack** + - **audiomack:album** + - **AWAAN** + - **awaan:live** + - **awaan:season** + - **awaan:video** + - **AZMedien**: AZ Medien videos + - **BaiduVideo**: 百度视频 + - **bambuser** + - **bambuser:channel** + - **Bandcamp** + - **Bandcamp:album** + - **Bandcamp:weekly** + - **bangumi.bilibili.com**: BiliBili番剧 + - **bbc**: BBC + - **bbc.co.uk**: BBC iPlayer + - **bbc.co.uk:article**: BBC articles + - **bbc.co.uk:iplayer:playlist** + - **bbc.co.uk:playlist** + - **BBVTV** + - **Beatport** + - **Beeg** + - **BehindKink** + - **Bellator** + - **BellMedia** + - **Bet** + - **bfi:player** + - **Bigflix** + - **Bild**: Bild.de + - **BiliBili** + - **BioBioChileTV** + - **BIQLE** + - **BitChute** + - **BitChuteChannel** + - **BleacherReport** + - **BleacherReportCMS** + - **blinkx** + - **Bloomberg** + - **BokeCC** + - **BostonGlobe** + - **Bpb**: Bundeszentrale für politische Bildung + - **BR**: Bayerischer Rundfunk + - **BravoTV** + - **Break** + - **brightcove:legacy** + - **brightcove:new** + - **BRMediathek**: Bayerischer Rundfunk Mediathek + - **bt:article**: Bergens Tidende Articles + - **bt:vestlendingen**: Bergens Tidende - Vestlendingen + - **BusinessInsider** + - **BuzzFeed** + - **BYUtv** + - **Camdemy** + - **CamdemyFolder** + - **CamModels** + - **CamTube** + - **CamWithHer** + - **canalc2.tv** + - **Canalplus**: mycanal.fr and piwiplus.fr + - **Canvas** + - **CanvasEen**: canvas.be and een.be + - **CarambaTV** + - **CarambaTVPage** + - **CartoonNetwork** + - **cbc.ca** + - **cbc.ca:olympics** + - **cbc.ca:player** + - **cbc.ca:watch** + - **cbc.ca:watch:video** + - **CBS** + - **CBSInteractive** + - **CBSLocal** + - **cbsnews**: CBS News + - **cbsnews:embed** + - **cbsnews:livevideo**: CBS News Live Videos + - **CBSSports** + - **CCMA** + - **CCTV**: 央视网 + - **CDA** + - **CeskaTelevize** + - **CeskaTelevizePorady** + - **channel9**: Channel 9 + - **CharlieRose** + - **Chaturbate** + - **Chilloutzone** + - **chirbit** + - **chirbit:profile** + - **Cinchcast** + - **Cinemax** + - **CiscoLiveSearch** + - **CiscoLiveSession** + - **CJSW** + - **cliphunter** + - **Clippit** + - **ClipRs** + - **Clipsyndicate** + - **CloserToTruth** + - **CloudflareStream** + - **Cloudy** + - **Clubic** + - **Clyp** + - **cmt.com** + - **CNBC** + - **CNBCVideo** + - **CNN** + - **CNNArticle** + - **CNNBlogs** + - **ComCarCoff** + - **ComedyCentral** + - **ComedyCentralFullEpisodes** + - **ComedyCentralShortname** + - **ComedyCentralTV** + - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED + - **Corus** + - **Coub** + - **Cracked** + - **Crackle** + - **CrooksAndLiars** + - **crunchyroll** + - **crunchyroll:playlist** + - **CSNNE** + - **CSpan**: C-SPAN + - **CtsNews**: 華視新聞 + - **CTVNews** + - **cu.ntv.co.jp**: Nippon Television Network + - **Culturebox** + - **CultureUnplugged** + - **curiositystream** + - **curiositystream:collection** + - **CWTV** + - **DailyMail** + - **dailymotion** + - **dailymotion:playlist** + - **dailymotion:user** + - **DaisukiMotto** + - **DaisukiMottoPlaylist** + - **daum.net** + - **daum.net:clip** + - **daum.net:playlist** + - **daum.net:user** + - **DBTV** + - **DctpTv** + - **DeezerPlaylist** + - **defense.gouv.fr** + - **democracynow** + - **DHM**: Filmarchiv - Deutsches Historisches Museum + - **Digg** + - **DigitallySpeaking** + - **Digiteka** + - **Discovery** + - **DiscoveryGo** + - **DiscoveryGoPlaylist** + - **DiscoveryNetworksDe** + - **DiscoveryVR** + - **Disney** + - **Dotsub** + - **DouyuShow** + - **DouyuTV**: 斗鱼 + - **DPlay** + - **DPlayIt** + - **DRBonanza** + - **Dropbox** + - **DrTuber** + - **drtv** + - **drtv:live** + - **DTube** + - **Dumpert** + - **dvtv**: http://video.aktualne.cz/ + - **dw** + - **dw:article** + - **EaglePlatform** + - **EbaumsWorld** + - **EchoMsk** + - **egghead:course**: egghead.io course + - **egghead:lesson**: egghead.io lesson + - **ehftv** + - **eHow** + - **EinsUndEinsTV** + - **Einthusan** + - **eitb.tv** + - **EllenTube** + - **EllenTubePlaylist** + - **EllenTubeVideo** + - **ElPais**: El País + - **Embedly** + - **EMPFlix** + - **Engadget** + - **Eporner** + - **EroProfile** + - **Escapist** + - **ESPN** + - **ESPNArticle** + - **EsriVideo** + - **Europa** + - **EveryonesMixtape** + - **EWETV** + - **ExpoTV** + - **Expressen** + - **ExtremeTube** + - **EyedoTV** + - **facebook** + - **FacebookPluginsVideo** + - **faz.net** + - **fc2** + - **fc2:embed** + - **Fczenit** + - **filmon** + - **filmon:channel** + - **Filmweb** + - **FiveThirtyEight** + - **FiveTV** + - **Flickr** + - **Flipagram** + - **Folketinget**: Folketinget (ft.dk; Danish parliament) + - **FootyRoom** + - **Formula1** + - **FOX** + - **FOX9** + - **Foxgay** + - **foxnews**: Fox News and Fox Business Video + - **foxnews:article** + - **FoxSports** + - **france2.fr:generation-what** + - **FranceCulture** + - **FranceInter** + - **FranceTV** + - **FranceTVEmbed** + - **francetvinfo.fr** + - **FranceTVJeunesse** + - **FranceTVSite** + - **Freesound** + - **freespeech.org** + - **FreshLive** + - **FrontendMasters** + - **FrontendMastersCourse** + - **FrontendMastersLesson** + - **Funimation** + - **FunkChannel** + - **FunkMix** + - **FunnyOrDie** + - **Fusion** + - **Fux** + - **FXNetworks** + - **Gaia** + - **GameInformer** + - **GameOne** + - **gameone:playlist** + - **GameSpot** + - **GameStar** + - **Gaskrank** + - **Gazeta** + - **GDCVault** + - **generic**: Generic downloader that works on some sites + - **Gfycat** + - **GiantBomb** + - **Giga** + - **GlattvisionTV** + - **Glide**: Glide mobile video messages (glide.me) + - **Globo** + - **GloboArticle** + - **Go** + - **Go90** + - **GodTube** + - **Golem** + - **GoogleDrive** + - **Goshgay** + - **GPUTechConf** + - **Groupon** + - **Hark** + - **hbo** + - **HearThisAt** + - **Heise** + - **HellPorno** + - **Helsinki**: helsinki.fi + - **HentaiStigma** + - **hetklokhuis** + - **hgtv.com:show** + - **HiDive** + - **HistoricFilms** + - **history:topic**: History.com Topic + - **hitbox** + - **hitbox:live** + - **HitRecord** + - **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau + - **HornBunny** + - **HotNewHipHop** + - **hotstar** + - **hotstar:playlist** + - **Howcast** + - **HowStuffWorks** + - **HRTi** + - **HRTiPlaylist** + - **Huajiao**: 花椒直播 + - **HuffPost**: Huffington Post + - **Hungama** + - **HungamaSong** + - **Hypem** + - **Iconosquare** + - **ign.com** + - **imdb**: Internet Movie Database trailers + - **imdb:list**: Internet Movie Database lists + - **Imgur** + - **imgur:album** + - **imgur:gallery** + - **Ina** + - **Inc** + - **IndavideoEmbed** + - **InfoQ** + - **Instagram** + - **instagram:tag**: Instagram hashtag search + - **instagram:user**: Instagram user profile + - **Internazionale** + - **InternetVideoArchive** + - **IPrima** + - **iqiyi**: 爱奇艺 + - **Ir90Tv** + - **ITTF** + - **ITV** + - **ITVBTCC** + - **ivi**: ivi.ru + - **ivi:compilation**: ivi.ru compilations + - **ivideon**: Ivideon TV + - **Iwara** + - **Izlesene** + - **Jamendo** + - **JamendoAlbum** + - **JeuxVideo** + - **Joj** + - **Jove** + - **jpopsuki.tv** + - **JWPlatform** + - **Kakao** + - **Kaltura** + - **KanalPlay**: Kanal 5/9/11 Play + - **Kankan** + - **Karaoketv** + - **KarriereVideos** + - **keek** + - **KeezMovies** + - **Ketnet** + - **KhanAcademy** + - **KickStarter** + - **KinoPoisk** + - **KonserthusetPlay** + - **kontrtube**: KontrTube.ru - Труба зовёт + - **KrasView**: Красвью + - **Ku6** + - **KUSI** + - **kuwo:album**: 酷我音乐 - 专辑 + - **kuwo:category**: 酷我音乐 - 分类 + - **kuwo:chart**: 酷我音乐 - 排行榜 + - **kuwo:mv**: 酷我音乐 - MV + - **kuwo:singer**: 酷我音乐 - 歌手 + - **kuwo:song**: 酷我音乐 + - **la7.it** + - **laola1tv** + - **laola1tv:embed** + - **LCI** + - **Lcp** + - **LcpPlay** + - **Le**: 乐视网 + - **Learnr** + - **Lecture2Go** + - **Lecturio** + - **LecturioCourse** + - **LecturioDeCourse** + - **LEGO** + - **Lemonde** + - **Lenta** + - **LePlaylist** + - **LetvCloud**: 乐视云 + - **Libsyn** + - **life**: Life.ru + - **life:embed** + - **limelight** + - **limelight:channel** + - **limelight:channel_list** + - **LineTV** + - **linkedin:learning** + - **linkedin:learning:course** + - **LinuxAcademy** + - **LiTV** + - **LiveLeak** + - **LiveLeakEmbed** + - **livestream** + - **livestream:original** + - **LnkGo** + - **loc**: Library of Congress + - **LocalNews8** + - **LoveHomePorn** + - **lrt.lt** + - **lynda**: lynda.com videos + - **lynda:course**: lynda.com online courses + - **m6** + - **macgamestore**: MacGameStore trailers + - **mailru**: Видео@Mail.Ru + - **mailru:music**: Музыка@Mail.Ru + - **mailru:music:search**: Музыка@Mail.Ru + - **MakerTV** + - **MallTV** + - **mangomolo:live** + - **mangomolo:video** + - **ManyVids** + - **Markiza** + - **MarkizaPage** + - **massengeschmack.tv** + - **MatchTV** + - **MDR**: MDR.DE and KiKA + - **media.ccc.de** + - **media.ccc.de:lists** + - **Medialaan** + - **Mediaset** + - **Mediasite** + - **MediasiteCatalog** + - **MediasiteNamedCatalog** + - **Medici** + - **megaphone.fm**: megaphone.fm embedded players + - **Meipai**: 美拍 + - **MelonVOD** + - **META** + - **metacafe** + - **Metacritic** + - **Mgoon** + - **MGTV**: 芒果TV + - **MiaoPai** + - **Minhateca** + - **MinistryGrid** + - **Minoto** + - **miomio.tv** + - **MiTele**: mitele.es + - **mixcloud** + - **mixcloud:playlist** + - **mixcloud:stream** + - **mixcloud:user** + - **Mixer:live** + - **Mixer:vod** + - **MLB** + - **Mnet** + - **MNetTV** + - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net + - **Mofosex** + - **Mojvideo** + - **Morningstar**: morningstar.com + - **Motherless** + - **MotherlessGroup** + - **Motorsport**: motorsport.com + - **MovieClips** + - **MovieFap** + - **Moviezine** + - **MovingImage** + - **MSN** + - **mtg**: MTG services + - **mtv** + - **mtv.de** + - **mtv81** + - **mtv:video** + - **mtvservices:embedded** + - **MuenchenTV**: münchen.tv + - **MusicPlayOn** + - **mva**: Microsoft Virtual Academy videos + - **mva:course**: Microsoft Virtual Academy courses + - **Mwave** + - **MwaveMeetGreet** + - **MyChannels** + - **MySpace** + - **MySpace:album** + - **MySpass** + - **Myvi** + - **MyVidster** + - **MyviEmbed** + - **MyVisionTV** + - **n-tv.de** + - **natgeo:video** + - **NationalGeographicTV** + - **Naver** + - **NBA** + - **NBC** + - **NBCNews** + - **nbcolympics** + - **nbcolympics:stream** + - **NBCSports** + - **NBCSportsStream** + - **NBCSportsVPlayer** + - **ndr**: NDR.de - Norddeutscher Rundfunk + - **ndr:embed** + - **ndr:embed:base** + - **NDTV** + - **NerdCubedFeed** + - **netease:album**: 网易云音乐 - 专辑 + - **netease:djradio**: 网易云音乐 - 电台 + - **netease:mv**: 网易云音乐 - MV + - **netease:playlist**: 网易云音乐 - 歌单 + - **netease:program**: 网易云音乐 - 电台节目 + - **netease:singer**: 网易云音乐 - 歌手 + - **netease:song**: 网易云音乐 + - **NetPlus** + - **Netzkino** + - **Newgrounds** + - **NewgroundsPlaylist** + - **Newstube** + - **NextMedia**: 蘋果日報 + - **NextMediaActionNews**: 蘋果日報 - 動新聞 + - **NextTV**: 壹電視 + - **Nexx** + - **NexxEmbed** + - **nfl.com** + - **NhkVod** + - **nhl.com** + - **nick.com** + - **nick.de** + - **nickelodeon:br** + - **nickelodeonru** + - **nicknight** + - **niconico**: ニコニコ動画 + - **NiconicoPlaylist** + - **Nintendo** + - **njoy**: N-JOY + - **njoy:embed** + - **NJPWWorld**: 新日本プロレスワールド + - **NobelPrize** + - **Noco** + - **NonkTube** + - **Noovo** + - **Normalboots** + - **NosVideo** + - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz + - **NovaEmbed** + - **nowness** + - **nowness:playlist** + - **nowness:series** + - **Noz** + - **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl + - **npo.nl:live** + - **npo.nl:radio** + - **npo.nl:radio:fragment** + - **Npr** + - **NRK** + - **NRKPlaylist** + - **NRKSkole**: NRK Skole + - **NRKTV**: NRK TV and NRK Radio + - **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte + - **NRKTVEpisode** + - **NRKTVEpisodes** + - **NRKTVSeason** + - **NRKTVSeries** + - **NRLTV** + - **ntv.ru** + - **Nuvid** + - **NYTimes** + - **NYTimesArticle** + - **NZZ** + - **ocw.mit.edu** + - **OdaTV** + - **Odnoklassniki** + - **OktoberfestTV** + - **OnDemandKorea** + - **onet.pl** + - **onet.tv** + - **onet.tv:channel** + - **OnetMVP** + - **OnionStudios** + - **Ooyala** + - **OoyalaExternal** + - **Openload** + - **OraTV** + - **orf:fm4**: radio FM4 + - **orf:fm4:story**: fm4.orf.at stories + - **orf:iptv**: iptv.ORF.at + - **orf:oe1**: Radio Österreich 1 + - **orf:tvthek**: ORF TVthek + - **OsnatelTV** + - **OutsideTV** + - **PacktPub** + - **PacktPubCourse** + - **PandaTV**: 熊猫TV + - **pandora.tv**: 판도라TV + - **ParamountNetwork** + - **parliamentlive.tv**: UK parliament videos + - **Patreon** + - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) + - **pcmag** + - **PearVideo** + - **PeerTube** + - **People** + - **PerformGroup** + - **periscope**: Periscope + - **periscope:user**: Periscope user videos + - **PhilharmonieDeParis**: Philharmonie de Paris + - **phoenix.de** + - **Photobucket** + - **Picarto** + - **PicartoVod** + - **Piksel** + - **Pinkbike** + - **Pladform** + - **Platzi** + - **PlatziCourse** + - **play.fm** + - **PlayPlusTV** + - **PlaysTV** + - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz + - **Playvid** + - **Playwire** + - **pluralsight** + - **pluralsight:course** + - **plus.google**: Google Plus + - **podomatic** + - **Pokemon** + - **PolskieRadio** + - **PolskieRadioCategory** + - **PopcornTV** + - **PornCom** + - **PornerBros** + - **PornHd** + - **PornHub**: PornHub and Thumbzilla + - **PornHubPagedVideoList** + - **PornHubUser** + - **PornHubUserVideosUpload** + - **Pornotube** + - **PornoVoisines** + - **PornoXO** + - **PornTube** + - **PressTV** + - **PromptFile** + - **prosiebensat1**: ProSiebenSat.1 Digital + - **puhutv** + - **puhutv:serie** + - **Puls4** + - **Pyvideo** + - **qqmusic**: QQ音乐 + - **qqmusic:album**: QQ音乐 - 专辑 + - **qqmusic:playlist**: QQ音乐 - 歌单 + - **qqmusic:singer**: QQ音乐 - 歌手 + - **qqmusic:toplist**: QQ音乐 - 排行榜 + - **QuantumTV** + - **Quickline** + - **QuicklineLive** + - **R7** + - **R7Article** + - **radio.de** + - **radiobremen** + - **radiocanada** + - **radiocanada:audiovideo** + - **radiofrance** + - **RadioJavan** + - **Rai** + - **RaiPlay** + - **RaiPlayLive** + - **RaiPlayPlaylist** + - **RayWenderlich** + - **RayWenderlichCourse** + - **RBMARadio** + - **RDS**: RDS.ca + - **RedBullTV** + - **RedBullTVRrnContent** + - **Reddit** + - **RedditR** + - **RedTube** + - **RegioTV** + - **RENTV** + - **RENTVArticle** + - **Restudy** + - **Reuters** + - **ReverbNation** + - **revision** + - **revision3:embed** + - **RICE** + - **RMCDecouverte** + - **RockstarGames** + - **RoosterTeeth** + - **RottenTomatoes** + - **Roxwel** + - **Rozhlas** + - **RTBF** + - **rte**: Raidió Teilifís Éireann TV + - **rte:radio**: Raidió Teilifís Éireann radio + - **rtl.nl**: rtl.nl and rtlxl.nl + - **rtl2** + - **rtl2:you** + - **rtl2:you:series** + - **RTP** + - **RTS**: RTS.ch + - **rtve.es:alacarta**: RTVE a la carta + - **rtve.es:infantil**: RTVE infantil + - **rtve.es:live**: RTVE.es live streams + - **rtve.es:television** + - **RTVNH** + - **RTVS** + - **Rudo** + - **RUHD** + - **rutube**: Rutube videos + - **rutube:channel**: Rutube channels + - **rutube:embed**: Rutube embedded videos + - **rutube:movie**: Rutube movies + - **rutube:person**: Rutube person videos + - **rutube:playlist**: Rutube playlists + - **RUTV**: RUTV.RU + - **Ruutu** + - **Ruv** + - **safari**: safaribooksonline.com online video + - **safari:api** + - **safari:course**: safaribooksonline.com online courses + - **SAKTV** + - **SaltTV** + - **Sapo**: SAPO Vídeos + - **savefrom.net** + - **SBS**: sbs.com.au + - **schooltv** + - **screen.yahoo:search**: Yahoo screen search + - **Screencast** + - **ScreencastOMatic** + - **scrippsnetworks:watch** + - **Seeker** + - **SenateISVP** + - **SendtoNews** + - **ServingSys** + - **Servus** + - **Sexu** + - **SeznamZpravy** + - **SeznamZpravyArticle** + - **Shahid** + - **ShahidShow** + - **Shared**: shared.sx + - **ShowRoomLive** + - **Sina** + - **SkylineWebcams** + - **SkyNews** + - **skynewsarabia:article** + - **skynewsarabia:video** + - **SkySports** + - **Slideshare** + - **SlidesLive** + - **Slutload** + - **smotri**: Smotri.com + - **smotri:broadcast**: Smotri.com broadcasts + - **smotri:community**: Smotri.com community videos + - **smotri:user**: Smotri.com user videos + - **Snotr** + - **Sohu** + - **SonyLIV** + - **soundcloud** + - **soundcloud:playlist** + - **soundcloud:search**: Soundcloud search + - **soundcloud:set** + - **soundcloud:trackstation** + - **soundcloud:user** + - **soundgasm** + - **soundgasm:profile** + - **southpark.cc.com** + - **southpark.cc.com:español** + - **southpark.de** + - **southpark.nl** + - **southparkstudios.dk** + - **SpankBang** + - **SpankBangPlaylist** + - **Spankwire** + - **Spiegel** + - **Spiegel:Article**: Articles on spiegel.de + - **Spiegeltv** + - **sport.francetvinfo.fr** + - **Sport5** + - **SportBox** + - **SportDeutschland** + - **SpringboardPlatform** + - **Sprout** + - **sr:mediathek**: Saarländischer Rundfunk + - **SRGSSR** + - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites + - **stanfordoc**: Stanford Open ClassRoom + - **Steam** + - **Stitcher** + - **Streamable** + - **Streamango** + - **streamcloud.eu** + - **StreamCZ** + - **StreetVoice** + - **StretchInternet** + - **stv:player** + - **SunPorno** + - **sverigesradio:episode** + - **sverigesradio:publication** + - **SVT** + - **SVTPage** + - **SVTPlay**: SVT Play and Öppet arkiv + - **SVTSeries** + - **SWRMediathek** + - **Syfy** + - **SztvHu** + - **t-online.de** + - **Tagesschau** + - **tagesschau:player** + - **Tass** + - **TastyTrade** + - **TBS** + - **TDSLifeway** + - **Teachable** + - **TeachableCourse** + - **teachertube**: teachertube.com videos + - **teachertube:user:collection**: teachertube.com user and collection videos + - **TeachingChannel** + - **Teamcoco** + - **TeamTreeHouse** + - **TechTalks** + - **techtv.mit.edu** + - **ted** + - **Tele13** + - **Tele5** + - **TeleBruxelles** + - **Telecinco**: telecinco.es, cuatro.com and mediaset.es + - **Telegraaf** + - **TeleMB** + - **TeleQuebec** + - **TeleQuebecEmission** + - **TeleQuebecLive** + - **TeleTask** + - **Telewebion** + - **TennisTV** + - **TF1** + - **TFO** + - **TheIntercept** + - **theoperaplatform** + - **ThePlatform** + - **ThePlatformFeed** + - **TheScene** + - **TheStar** + - **TheSun** + - **TheWeatherChannel** + - **ThisAmericanLife** + - **ThisAV** + - **ThisOldHouse** + - **TikTok** + - **TikTokUser** + - **tinypic**: tinypic.com videos + - **TMZ** + - **TMZArticle** + - **TNAFlix** + - **TNAFlixNetworkEmbed** + - **toggle** + - **ToonGoggles** + - **Tosh**: Tosh.0 + - **tou.tv** + - **Toypics**: Toypics video + - **ToypicsUser**: Toypics user profile + - **TrailerAddict** (Currently broken) + - **Trilulilu** + - **TruNews** + - **TruTV** + - **Tube8** + - **TubiTv** + - **Tumblr** + - **tunein:clip** + - **tunein:program** + - **tunein:station** + - **tunein:topic** + - **TunePk** + - **Turbo** + - **Tutv** + - **tv.dfb.de** + - **TV2** + - **tv2.hu** + - **TV2Article** + - **TV4**: tv4.se and tv4play.se + - **TV5MondePlus**: TV5MONDE+ + - **TVA** + - **TVANouvelles** + - **TVANouvellesArticle** + - **TVC** + - **TVCArticle** + - **tvigle**: Интернет-телевидение Tvigle.ru + - **tvland.com** + - **TVN24** + - **TVNet** + - **TVNoe** + - **TVNow** + - **TVNowAnnual** + - **TVNowNew** + - **TVNowSeason** + - **TVNowShow** + - **tvp**: Telewizja Polska + - **tvp:embed**: Telewizja Polska + - **tvp:series** + - **TVPlayer** + - **TVPlayHome** + - **Tweakers** + - **TwitCasting** + - **twitch:chapter** + - **twitch:clips** + - **twitch:profile** + - **twitch:stream** + - **twitch:video** + - **twitch:videos:all** + - **twitch:videos:highlights** + - **twitch:videos:past-broadcasts** + - **twitch:videos:uploads** + - **twitch:vod** + - **twitter** + - **twitter:amplify** + - **twitter:card** + - **udemy** + - **udemy:course** + - **UDNEmbed**: 聯合影音 + - **UFCTV** + - **UKTVPlay** + - **umg:de**: Universal Music Deutschland + - **Unistra** + - **Unity** + - **uol.com.br** + - **uplynk** + - **uplynk:preplay** + - **Urort**: NRK P3 Urørt + - **URPlay** + - **USANetwork** + - **USAToday** + - **ustream** + - **ustream:channel** + - **ustudio** + - **ustudio:embed** + - **Varzesh3** + - **Vbox7** + - **VeeHD** + - **Veoh** + - **verystream** + - **Vessel** + - **Vesti**: Вести.Ru + - **Vevo** + - **VevoPlaylist** + - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet + - **vh1.com** + - **vhx:embed** + - **Viafree** + - **vice** + - **vice:article** + - **vice:show** + - **Vidbit** + - **Viddler** + - **Videa** + - **video.google:search**: Google Video search + - **video.mit.edu** + - **VideoDetective** + - **videofy.me** + - **videomore** + - **videomore:season** + - **videomore:video** + - **VideoPremium** + - **VideoPress** + - **Vidio** + - **VidLii** + - **vidme** + - **vidme:user** + - **vidme:user:likes** + - **Vidzi** + - **vier**: vier.be and vijf.be + - **vier:videos** + - **ViewLift** + - **ViewLiftEmbed** + - **Viewster** + - **Viidea** + - **viki** + - **viki:channel** + - **vimeo** + - **vimeo:album** + - **vimeo:channel** + - **vimeo:group** + - **vimeo:likes**: Vimeo user likes + - **vimeo:ondemand** + - **vimeo:review**: Review pages on vimeo + - **vimeo:user** + - **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication) + - **Vimple**: Vimple - one-click video hosting + - **Vine** + - **vine:user** + - **Viqeo** + - **Viu** + - **viu:ott** + - **viu:playlist** + - **Vivo**: vivo.sx + - **vk**: VK + - **vk:uservideos**: VK - User's Videos + - **vk:wallpost** + - **vlive** + - **vlive:channel** + - **vlive:playlist** + - **Vodlocker** + - **VODPl** + - **VODPlatform** + - **VoiceRepublic** + - **Voot** + - **VoxMedia** + - **VoxMediaVolume** + - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl + - **Vrak** + - **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza + - **VrtNU**: VrtNU.be + - **vrv** + - **vrv:series** + - **VShare** + - **VTXTV** + - **vube**: Vube.com + - **VuClip** + - **VVVVID** + - **VyboryMos** + - **Vzaar** + - **Wakanim** + - **Walla** + - **WalyTV** + - **washingtonpost** + - **washingtonpost:article** + - **wat.tv** + - **WatchBox** + - **WatchIndianPorn**: Watch Indian Porn + - **WDR** + - **wdr:mobile** + - **WDRElefant** + - **WDRPage** + - **Webcaster** + - **WebcasterFeed** + - **WebOfStories** + - **WebOfStoriesPlaylist** + - **Weibo** + - **WeiboMobile** + - **WeiqiTV**: WQTV + - **Wimp** + - **Wistia** + - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl + - **WorldStarHipHop** + - **WSJ**: Wall Street Journal + - **WSJArticle** + - **WWE** + - **XBef** + - **XboxClips** + - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me + - **XHamster** + - **XHamsterEmbed** + - **xiami:album**: 虾米音乐 - 专辑 + - **xiami:artist**: 虾米音乐 - 歌手 + - **xiami:collection**: 虾米音乐 - 精选集 + - **xiami:song**: 虾米音乐 + - **ximalaya**: 喜马拉雅FM + - **ximalaya:album**: 喜马拉雅FM 专辑 + - **XMinus** + - **XNXX** + - **Xstream** + - **XTube** + - **XTubeUser**: XTube user profile + - **Xuite**: 隨意窩Xuite影音 + - **XVideos** + - **XXXYMovies** + - **Yahoo**: Yahoo screen and movies + - **yahoo:gyao** + - **yahoo:gyao:player** + - **YandexDisk** + - **yandexmusic:album**: Яндекс.Музыка - Альбом + - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист + - **yandexmusic:track**: Яндекс.Музыка - Трек + - **YandexVideo** + - **YapFiles** + - **YesJapan** + - **yinyuetai:video**: 音悦Tai + - **Ynet** + - **YouJizz** + - **youku**: 优酷 + - **youku:show** + - **YouNowChannel** + - **YouNowLive** + - **YouNowMoment** + - **YouPorn** + - **YourPorn** + - **YourUpload** + - **youtube**: YouTube.com + - **youtube:channel**: YouTube.com channels + - **youtube:favorites**: YouTube.com favourite videos, ":ytfav" for short (requires authentication) + - **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication) + - **youtube:live**: YouTube.com live streams + - **youtube:playlist**: YouTube.com playlists + - **youtube:playlists**: YouTube.com user/channel playlists + - **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication) + - **youtube:search**: YouTube.com searches + - **youtube:search:date**: YouTube.com searches, newest videos first + - **youtube:search_url**: YouTube.com search URLs + - **youtube:show**: YouTube.com (multi-season) shows + - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication) + - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword) + - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) + - **Zapiks** + - **Zaq1** + - **Zattoo** + - **ZattooLive** + - **ZDF** + - **ZDFChannel** + - **zingmp3**: mp3.zing.vn + - **Zype** diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 000000000..da78a9c47 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,6 @@ +[wheel] +universal = True + +[flake8] +exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv +ignore = E402,E501,E731,E741,W503 diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..af68b485e --- /dev/null +++ b/setup.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python +# coding: utf-8 + +from __future__ import print_function + +import os.path +import warnings +import sys + +try: + from setuptools import setup, Command + setuptools_available = True +except ImportError: + from distutils.core import setup, Command + setuptools_available = False +from distutils.spawn import spawn + +try: + # This will create an exe that needs Microsoft Visual C++ 2008 + # Redistributable Package + import py2exe +except ImportError: + if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': + print('Cannot import py2exe', file=sys.stderr) + exit(1) + +py2exe_options = { + 'bundle_files': 1, + 'compressed': 1, + 'optimize': 2, + 'dist_dir': '.', + 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], +} + +# Get the version from youtube_dl/version.py without importing the package +exec(compile(open('youtube_dl/version.py').read(), + 'youtube_dl/version.py', 'exec')) + +DESCRIPTION = 'YouTube video downloader' +LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites' + +py2exe_console = [{ + 'script': './youtube_dl/__main__.py', + 'dest_base': 'youtube-dl', + 'version': __version__, + 'description': DESCRIPTION, + 'comments': LONG_DESCRIPTION, + 'product_name': 'youtube-dl', + 'product_version': __version__, +}] + +py2exe_params = { + 'console': py2exe_console, + 'options': {'py2exe': py2exe_options}, + 'zipfile': None +} + +if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': + params = py2exe_params +else: + files_spec = [ + ('etc/bash_completion.d', ['youtube-dl.bash-completion']), + ('etc/fish/completions', ['youtube-dl.fish']), + ('share/doc/youtube_dl', ['README.txt']), + ('share/man/man1', ['youtube-dl.1']) + ] + root = os.path.dirname(os.path.abspath(__file__)) + data_files = [] + for dirname, files in files_spec: + resfiles = [] + for fn in files: + if not os.path.exists(fn): + warnings.warn('Skipping file %s since it is not present. Type make to build all automatically generated files.' % fn) + else: + resfiles.append(fn) + data_files.append((dirname, resfiles)) + + params = { + 'data_files': data_files, + } + if setuptools_available: + params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']} + else: + params['scripts'] = ['bin/youtube-dl'] + +class build_lazy_extractors(Command): + description = 'Build the extractor lazy loading module' + user_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + spawn( + [sys.executable, 'devscripts/make_lazy_extractors.py', 'youtube_dl/extractor/lazy_extractors.py'], + dry_run=self.dry_run, + ) + +setup( + name='youtube_dl', + version=__version__, + description=DESCRIPTION, + long_description=LONG_DESCRIPTION, + url='https://github.com/ytdl-org/youtube-dl', + author='Ricardo Garcia', + author_email='ytdl@yt-dl.org', + maintainer='Sergey M.', + maintainer_email='dstftw@gmail.com', + license='Unlicense', + packages=[ + 'youtube_dl', + 'youtube_dl.extractor', 'youtube_dl.downloader', + 'youtube_dl.postprocessor'], + + # Provokes warning on most systems (why?!) + # test_suite = 'nose.collector', + # test_requires = ['nosetest'], + + classifiers=[ + 'Topic :: Multimedia :: Video', + 'Development Status :: 5 - Production/Stable', + 'Environment :: Console', + 'License :: Public Domain', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.2', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: Implementation', + 'Programming Language :: Python :: Implementation :: CPython', + 'Programming Language :: Python :: Implementation :: IronPython', + 'Programming Language :: Python :: Implementation :: Jython', + 'Programming Language :: Python :: Implementation :: PyPy', + ], + + cmdclass={'build_lazy_extractors': build_lazy_extractors}, + **params +) diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/test/helper.py b/test/helper.py new file mode 100644 index 000000000..e62aab11e --- /dev/null +++ b/test/helper.py @@ -0,0 +1,282 @@ +from __future__ import unicode_literals + +import errno +import io +import hashlib +import json +import os.path +import re +import types +import ssl +import sys + +import youtube_dl.extractor +from youtube_dl import YoutubeDL +from youtube_dl.compat import ( + compat_os_name, + compat_str, +) +from youtube_dl.utils import ( + preferredencoding, + write_string, +) + + +def get_params(override=None): + PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), + "parameters.json") + LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), + "local_parameters.json") + with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: + parameters = json.load(pf) + if os.path.exists(LOCAL_PARAMETERS_FILE): + with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf: + parameters.update(json.load(pf)) + if override: + parameters.update(override) + return parameters + + +def try_rm(filename): + """ Remove a file if it exists """ + try: + os.remove(filename) + except OSError as ose: + if ose.errno != errno.ENOENT: + raise + + +def report_warning(message): + ''' + Print the message to stderr, it will be prefixed with 'WARNING:' + If stderr is a tty file the 'WARNING:' will be colored + ''' + if sys.stderr.isatty() and compat_os_name != 'nt': + _msg_header = '\033[0;33mWARNING:\033[0m' + else: + _msg_header = 'WARNING:' + output = '%s %s\n' % (_msg_header, message) + if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3: + output = output.encode(preferredencoding()) + sys.stderr.write(output) + + +class FakeYDL(YoutubeDL): + def __init__(self, override=None): + # Different instances of the downloader can't share the same dictionary + # some test set the "sublang" parameter, which would break the md5 checks. + params = get_params(override=override) + super(FakeYDL, self).__init__(params, auto_init=False) + self.result = [] + + def to_screen(self, s, skip_eol=None): + print(s) + + def trouble(self, s, tb=None): + raise Exception(s) + + def download(self, x): + self.result.append(x) + + def expect_warning(self, regex): + # Silence an expected warning matching a regex + old_report_warning = self.report_warning + + def report_warning(self, message): + if re.match(regex, message): + return + old_report_warning(message) + self.report_warning = types.MethodType(report_warning, self) + + +def gettestcases(include_onlymatching=False): + for ie in youtube_dl.extractor.gen_extractors(): + for tc in ie.get_testcases(include_onlymatching): + yield tc + + +md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() + + +def expect_value(self, got, expected, field): + if isinstance(expected, compat_str) and expected.startswith('re:'): + match_str = expected[len('re:'):] + match_rex = re.compile(match_str) + + self.assertTrue( + isinstance(got, compat_str), + 'Expected a %s object, but got %s for field %s' % ( + compat_str.__name__, type(got).__name__, field)) + self.assertTrue( + match_rex.match(got), + 'field %s (value: %r) should match %r' % (field, got, match_str)) + elif isinstance(expected, compat_str) and expected.startswith('startswith:'): + start_str = expected[len('startswith:'):] + self.assertTrue( + isinstance(got, compat_str), + 'Expected a %s object, but got %s for field %s' % ( + compat_str.__name__, type(got).__name__, field)) + self.assertTrue( + got.startswith(start_str), + 'field %s (value: %r) should start with %r' % (field, got, start_str)) + elif isinstance(expected, compat_str) and expected.startswith('contains:'): + contains_str = expected[len('contains:'):] + self.assertTrue( + isinstance(got, compat_str), + 'Expected a %s object, but got %s for field %s' % ( + compat_str.__name__, type(got).__name__, field)) + self.assertTrue( + contains_str in got, + 'field %s (value: %r) should contain %r' % (field, got, contains_str)) + elif isinstance(expected, type): + self.assertTrue( + isinstance(got, expected), + 'Expected type %r for field %s, but got value %r of type %r' % (expected, field, got, type(got))) + elif isinstance(expected, dict) and isinstance(got, dict): + expect_dict(self, got, expected) + elif isinstance(expected, list) and isinstance(got, list): + self.assertEqual( + len(expected), len(got), + 'Expect a list of length %d, but got a list of length %d for field %s' % ( + len(expected), len(got), field)) + for index, (item_got, item_expected) in enumerate(zip(got, expected)): + type_got = type(item_got) + type_expected = type(item_expected) + self.assertEqual( + type_expected, type_got, + 'Type mismatch for list item at index %d for field %s, expected %r, got %r' % ( + index, field, type_expected, type_got)) + expect_value(self, item_got, item_expected, field) + else: + if isinstance(expected, compat_str) and expected.startswith('md5:'): + self.assertTrue( + isinstance(got, compat_str), + 'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got))) + got = 'md5:' + md5(got) + elif isinstance(expected, compat_str) and re.match(r'^(?:min|max)?count:\d+', expected): + self.assertTrue( + isinstance(got, (list, dict)), + 'Expected field %s to be a list or a dict, but it is of type %s' % ( + field, type(got).__name__)) + op, _, expected_num = expected.partition(':') + expected_num = int(expected_num) + if op == 'mincount': + assert_func = assertGreaterEqual + msg_tmpl = 'Expected %d items in field %s, but only got %d' + elif op == 'maxcount': + assert_func = assertLessEqual + msg_tmpl = 'Expected maximum %d items in field %s, but got %d' + elif op == 'count': + assert_func = assertEqual + msg_tmpl = 'Expected exactly %d items in field %s, but got %d' + else: + assert False + assert_func( + self, len(got), expected_num, + msg_tmpl % (expected_num, field, len(got))) + return + self.assertEqual( + expected, got, + 'Invalid value for field %s, expected %r, got %r' % (field, expected, got)) + + +def expect_dict(self, got_dict, expected_dict): + for info_field, expected in expected_dict.items(): + got = got_dict.get(info_field) + expect_value(self, got, expected, info_field) + + +def expect_info_dict(self, got_dict, expected_dict): + expect_dict(self, got_dict, expected_dict) + # Check for the presence of mandatory fields + if got_dict.get('_type') not in ('playlist', 'multi_video'): + for key in ('id', 'url', 'title', 'ext'): + self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) + # Check for mandatory fields that are automatically set by YoutubeDL + for key in ['webpage_url', 'extractor', 'extractor_key']: + self.assertTrue(got_dict.get(key), 'Missing field: %s' % key) + + # Are checkable fields missing from the test case definition? + test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) + for key, value in got_dict.items() + if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location', 'age_limit')) + missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys()) + if missing_keys: + def _repr(v): + if isinstance(v, compat_str): + return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n') + else: + return repr(v) + info_dict_str = '' + if len(missing_keys) != len(expected_dict): + info_dict_str += ''.join( + ' %s: %s,\n' % (_repr(k), _repr(v)) + for k, v in test_info_dict.items() if k not in missing_keys) + + if info_dict_str: + info_dict_str += '\n' + info_dict_str += ''.join( + ' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k])) + for k in missing_keys) + write_string( + '\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr) + self.assertFalse( + missing_keys, + 'Missing keys in test definition: %s' % ( + ', '.join(sorted(missing_keys)))) + + +def assertRegexpMatches(self, text, regexp, msg=None): + if hasattr(self, 'assertRegexp'): + return self.assertRegexp(text, regexp, msg) + else: + m = re.match(regexp, text) + if not m: + note = 'Regexp didn\'t match: %r not found' % (regexp) + if len(text) < 1000: + note += ' in %r' % text + if msg is None: + msg = note + else: + msg = note + ', ' + msg + self.assertTrue(m, msg) + + +def assertGreaterEqual(self, got, expected, msg=None): + if not (got >= expected): + if msg is None: + msg = '%r not greater than or equal to %r' % (got, expected) + self.assertTrue(got >= expected, msg) + + +def assertLessEqual(self, got, expected, msg=None): + if not (got <= expected): + if msg is None: + msg = '%r not less than or equal to %r' % (got, expected) + self.assertTrue(got <= expected, msg) + + +def assertEqual(self, got, expected, msg=None): + if not (got == expected): + if msg is None: + msg = '%r not equal to %r' % (got, expected) + self.assertTrue(got == expected, msg) + + +def expect_warnings(ydl, warnings_re): + real_warning = ydl.report_warning + + def _report_warning(w): + if not any(re.search(w_re, w) for w_re in warnings_re): + real_warning(w) + + ydl.report_warning = _report_warning + + +def http_server_port(httpd): + if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): + # In Jython SSLSocket is not a subclass of socket.socket + sock = httpd.socket.sock + else: + sock = httpd.socket + return sock.getsockname()[1] diff --git a/test/parameters.json b/test/parameters.json new file mode 100644 index 000000000..7bf59c25f --- /dev/null +++ b/test/parameters.json @@ -0,0 +1,43 @@ +{ + "consoletitle": false, + "continuedl": true, + "forcedescription": false, + "forcefilename": false, + "forceformat": false, + "forcethumbnail": false, + "forcetitle": false, + "forceurl": false, + "format": "best", + "ignoreerrors": false, + "listformats": null, + "logtostderr": false, + "matchtitle": null, + "max_downloads": null, + "nooverwrites": false, + "nopart": false, + "noprogress": false, + "outtmpl": "%(id)s.%(ext)s", + "password": null, + "playlistend": -1, + "playliststart": 1, + "prefer_free_formats": false, + "quiet": false, + "ratelimit": null, + "rejecttitle": null, + "retries": 10, + "simulate": false, + "subtitleslang": null, + "subtitlesformat": "best", + "test": true, + "updatetime": true, + "usenetrc": false, + "username": null, + "verbose": true, + "writedescription": false, + "writeinfojson": true, + "writesubtitles": false, + "allsubtitles": false, + "listssubtitles": false, + "socket_timeout": 20, + "fixup": "never" +} diff --git a/test/swftests/.gitignore b/test/swftests/.gitignore new file mode 100644 index 000000000..da97ff7ca --- /dev/null +++ b/test/swftests/.gitignore @@ -0,0 +1 @@ +*.swf diff --git a/test/swftests/ArrayAccess.as b/test/swftests/ArrayAccess.as new file mode 100644 index 000000000..e22caa386 --- /dev/null +++ b/test/swftests/ArrayAccess.as @@ -0,0 +1,19 @@ +// input: [["a", "b", "c", "d"]] +// output: ["c", "b", "a", "d"] + +package { +public class ArrayAccess { + public static function main(ar:Array):Array { + var aa:ArrayAccess = new ArrayAccess(); + return aa.f(ar, 2); + } + + private function f(ar:Array, num:Number):Array{ + var x:String = ar[0]; + var y:String = ar[num % ar.length]; + ar[0] = y; + ar[num] = x; + return ar; + } +} +} diff --git a/test/swftests/ClassCall.as b/test/swftests/ClassCall.as new file mode 100644 index 000000000..aef58daf3 --- /dev/null +++ b/test/swftests/ClassCall.as @@ -0,0 +1,17 @@ +// input: [] +// output: 121 + +package { +public class ClassCall { + public static function main():int{ + var f:OtherClass = new OtherClass(); + return f.func(100,20); + } +} +} + +class OtherClass { + public function func(x: int, y: int):int { + return x+y+1; + } +} diff --git a/test/swftests/ClassConstruction.as b/test/swftests/ClassConstruction.as new file mode 100644 index 000000000..436479f8f --- /dev/null +++ b/test/swftests/ClassConstruction.as @@ -0,0 +1,15 @@ +// input: [] +// output: 0 + +package { +public class ClassConstruction { + public static function main():int{ + var f:Foo = new Foo(); + return 0; + } +} +} + +class Foo { + +} diff --git a/test/swftests/ConstArrayAccess.as b/test/swftests/ConstArrayAccess.as new file mode 100644 index 000000000..07dc3f460 --- /dev/null +++ b/test/swftests/ConstArrayAccess.as @@ -0,0 +1,18 @@ +// input: [] +// output: 4 + +package { +public class ConstArrayAccess { + private static const x:int = 2; + private static const ar:Array = ["42", "3411"]; + + public static function main():int{ + var c:ConstArrayAccess = new ConstArrayAccess(); + return c.f(); + } + + public function f(): int { + return ar[1].length; + } +} +} diff --git a/test/swftests/ConstantInt.as b/test/swftests/ConstantInt.as new file mode 100644 index 000000000..e0bbb6166 --- /dev/null +++ b/test/swftests/ConstantInt.as @@ -0,0 +1,12 @@ +// input: [] +// output: 2 + +package { +public class ConstantInt { + private static const x:int = 2; + + public static function main():int{ + return x; + } +} +} diff --git a/test/swftests/DictCall.as b/test/swftests/DictCall.as new file mode 100644 index 000000000..c2d174cc2 --- /dev/null +++ b/test/swftests/DictCall.as @@ -0,0 +1,10 @@ +// input: [{"x": 1, "y": 2}] +// output: 3 + +package { +public class DictCall { + public static function main(d:Object):int{ + return d.x + d.y; + } +} +} diff --git a/test/swftests/EqualsOperator.as b/test/swftests/EqualsOperator.as new file mode 100644 index 000000000..837a69a46 --- /dev/null +++ b/test/swftests/EqualsOperator.as @@ -0,0 +1,10 @@ +// input: [] +// output: false + +package { +public class EqualsOperator { + public static function main():Boolean{ + return 1 == 2; + } +} +} diff --git a/test/swftests/LocalVars.as b/test/swftests/LocalVars.as new file mode 100644 index 000000000..b2911a9f3 --- /dev/null +++ b/test/swftests/LocalVars.as @@ -0,0 +1,13 @@ +// input: [1, 2] +// output: 3 + +package { +public class LocalVars { + public static function main(a:int, b:int):int{ + var c:int = a + b + b; + var d:int = c - b; + var e:int = d; + return e; + } +} +} diff --git a/test/swftests/MemberAssignment.as b/test/swftests/MemberAssignment.as new file mode 100644 index 000000000..dcba5e3ff --- /dev/null +++ b/test/swftests/MemberAssignment.as @@ -0,0 +1,22 @@ +// input: [1] +// output: 2 + +package { +public class MemberAssignment { + public var v:int; + + public function g():int { + return this.v; + } + + public function f(a:int):int{ + this.v = a; + return this.v + this.g(); + } + + public static function main(a:int): int { + var v:MemberAssignment = new MemberAssignment(); + return v.f(a); + } +} +} diff --git a/test/swftests/NeOperator.as b/test/swftests/NeOperator.as new file mode 100644 index 000000000..61dcbc4e9 --- /dev/null +++ b/test/swftests/NeOperator.as @@ -0,0 +1,24 @@ +// input: [] +// output: 123 + +package { +public class NeOperator { + public static function main(): int { + var res:int = 0; + if (1 != 2) { + res += 3; + } else { + res += 4; + } + if (2 != 2) { + res += 10; + } else { + res += 20; + } + if (9 == 9) { + res += 100; + } + return res; + } +} +} diff --git a/test/swftests/PrivateCall.as b/test/swftests/PrivateCall.as new file mode 100644 index 000000000..f1c110a37 --- /dev/null +++ b/test/swftests/PrivateCall.as @@ -0,0 +1,21 @@ +// input: [] +// output: 9 + +package { +public class PrivateCall { + public static function main():int{ + var f:OtherClass = new OtherClass(); + return f.func(); + } +} +} + +class OtherClass { + private function pf():int { + return 9; + } + + public function func():int { + return this.pf(); + } +} diff --git a/test/swftests/PrivateVoidCall.as b/test/swftests/PrivateVoidCall.as new file mode 100644 index 000000000..2cc016797 --- /dev/null +++ b/test/swftests/PrivateVoidCall.as @@ -0,0 +1,22 @@ +// input: [] +// output: 9 + +package { +public class PrivateVoidCall { + public static function main():int{ + var f:OtherClass = new OtherClass(); + f.func(); + return 9; + } +} +} + +class OtherClass { + private function pf():void { + ; + } + + public function func():void { + this.pf(); + } +} diff --git a/test/swftests/StaticAssignment.as b/test/swftests/StaticAssignment.as new file mode 100644 index 000000000..b061c219d --- /dev/null +++ b/test/swftests/StaticAssignment.as @@ -0,0 +1,13 @@ +// input: [1] +// output: 1 + +package { +public class StaticAssignment { + public static var v:int; + + public static function main(a:int):int{ + v = a; + return v; + } +} +} diff --git a/test/swftests/StaticRetrieval.as b/test/swftests/StaticRetrieval.as new file mode 100644 index 000000000..c8352d819 --- /dev/null +++ b/test/swftests/StaticRetrieval.as @@ -0,0 +1,16 @@ +// input: [] +// output: 1 + +package { +public class StaticRetrieval { + public static var v:int; + + public static function main():int{ + if (v) { + return 0; + } else { + return 1; + } + } +} +} diff --git a/test/swftests/StringBasics.as b/test/swftests/StringBasics.as new file mode 100644 index 000000000..d27430b13 --- /dev/null +++ b/test/swftests/StringBasics.as @@ -0,0 +1,11 @@ +// input: [] +// output: 3 + +package { +public class StringBasics { + public static function main():int{ + var s:String = "abc"; + return s.length; + } +} +} diff --git a/test/swftests/StringCharCodeAt.as b/test/swftests/StringCharCodeAt.as new file mode 100644 index 000000000..c20d74d65 --- /dev/null +++ b/test/swftests/StringCharCodeAt.as @@ -0,0 +1,11 @@ +// input: [] +// output: 9897 + +package { +public class StringCharCodeAt { + public static function main():int{ + var s:String = "abc"; + return s.charCodeAt(1) * 100 + s.charCodeAt(); + } +} +} diff --git a/test/swftests/StringConversion.as b/test/swftests/StringConversion.as new file mode 100644 index 000000000..c976f5042 --- /dev/null +++ b/test/swftests/StringConversion.as @@ -0,0 +1,11 @@ +// input: [] +// output: 2 + +package { +public class StringConversion { + public static function main():int{ + var s:String = String(99); + return s.length; + } +} +} diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py new file mode 100644 index 000000000..71f6608fe --- /dev/null +++ b/test/test_InfoExtractor.py @@ -0,0 +1,1071 @@ +#!/usr/bin/env python + +from __future__ import unicode_literals + +# Allow direct execution +import io +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import FakeYDL, expect_dict, expect_value, http_server_port +from youtube_dl.compat import compat_etree_fromstring, compat_http_server +from youtube_dl.extractor.common import InfoExtractor +from youtube_dl.extractor import YoutubeIE, get_info_extractor +from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError +import threading + + +TEAPOT_RESPONSE_STATUS = 418 +TEAPOT_RESPONSE_BODY = "

    418 I'm a teapot

    " + + +class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): + def log_message(self, format, *args): + pass + + def do_GET(self): + if self.path == '/teapot': + self.send_response(TEAPOT_RESPONSE_STATUS) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.end_headers() + self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) + else: + assert False + + +class TestIE(InfoExtractor): + pass + + +class TestInfoExtractor(unittest.TestCase): + def setUp(self): + self.ie = TestIE(FakeYDL()) + + def test_ie_key(self): + self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE) + + def test_html_search_regex(self): + html = '

    Watch this video

    ' + search = lambda re, *args: self.ie._html_search_regex(re, html, *args) + self.assertEqual(search(r'

    (.+?)

    ', 'foo'), 'Watch this video') + + def test_opengraph(self): + ie = self.ie + html = ''' + + + + + + + + + ''' + self.assertEqual(ie._og_search_title(html), 'Foo') + self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') + self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2') + self.assertEqual(ie._og_search_video_url(html, default=None), None) + self.assertEqual(ie._og_search_property('foobar', html), 'Foo') + self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar') + self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar') + self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph') + self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar') + self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True) + self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True) + + def test_html_search_meta(self): + ie = self.ie + html = ''' + + + + + + + ''' + + self.assertEqual(ie._html_search_meta('a', html), '1') + self.assertEqual(ie._html_search_meta('b', html), '2') + self.assertEqual(ie._html_search_meta('c', html), '3') + self.assertEqual(ie._html_search_meta('d', html), '4') + self.assertEqual(ie._html_search_meta('e', html), '5') + self.assertEqual(ie._html_search_meta('f', html), '6') + self.assertEqual(ie._html_search_meta(('a', 'b', 'c'), html), '1') + self.assertEqual(ie._html_search_meta(('c', 'b', 'a'), html), '3') + self.assertEqual(ie._html_search_meta(('z', 'x', 'c'), html), '3') + self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True) + self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True) + + def test_download_json(self): + uri = encode_data_uri(b'{"foo": "blah"}', 'application/json') + self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'}) + uri = encode_data_uri(b'callback({"foo": "blah"})', 'application/javascript') + self.assertEqual(self.ie._download_json(uri, None, transform_source=strip_jsonp), {'foo': 'blah'}) + uri = encode_data_uri(b'{"foo": invalid}', 'application/json') + self.assertRaises(ExtractorError, self.ie._download_json, uri, None) + self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) + + def test_parse_html5_media_entries(self): + # from https://www.r18.com/ + # with kpbs in label + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://www.r18.com/', + r''' + + ''', None)[0], + { + 'formats': [{ + 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_sm_w.mp4', + 'ext': 'mp4', + 'format_id': '300kbps', + 'height': 240, + 'tbr': 300, + }, { + 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dm_w.mp4', + 'ext': 'mp4', + 'format_id': '1000kbps', + 'height': 480, + 'tbr': 1000, + }, { + 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dmb_w.mp4', + 'ext': 'mp4', + 'format_id': '1500kbps', + 'height': 740, + 'tbr': 1500, + }], + 'thumbnail': '//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg' + }) + + # from https://www.csfd.cz/ + # with width and height + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://www.csfd.cz/', + r''' + + ''', None)[0], + { + 'formats': [{ + 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327358_eac647.mp4', + 'ext': 'mp4', + 'width': 640, + 'height': 360, + }, { + 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327360_3d2646.mp4', + 'ext': 'mp4', + 'width': 1280, + 'height': 720, + }, { + 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327356_91f258.mp4', + 'ext': 'mp4', + 'width': 1920, + 'height': 1080, + }, { + 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327359_962b4a.webm', + 'ext': 'webm', + 'width': 640, + 'height': 360, + }, { + 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327361_6feee0.webm', + 'ext': 'webm', + 'width': 1280, + 'height': 720, + }, { + 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327357_8ab472.webm', + 'ext': 'webm', + 'width': 1920, + 'height': 1080, + }], + 'subtitles': { + 'cs': [{'url': 'https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt'}] + }, + 'thumbnail': 'https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360' + }) + + # from https://tamasha.com/v/Kkdjw + # with height in label + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://tamasha.com/v/Kkdjw', + r''' + + ''', None)[0], + { + 'formats': [{ + 'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4', + }, { + 'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4', + 'ext': 'mp4', + 'format_id': '240p', + 'height': 240, + }, { + 'url': 'https://s-v2.tamasha.com/statics/videos_file/20/00/Kkdjw_200041c66f657fc967db464d156eafbc1ed9fe6f_n_144.mp4', + 'ext': 'mp4', + 'format_id': '144p', + 'height': 144, + }] + }) + + # from https://www.directvnow.com + # with data-src + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://www.directvnow.com', + r''' + + ''', None)[0], + { + 'formats': [{ + 'ext': 'mp4', + 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4', + }] + }) + + # from https://www.directvnow.com + # with data-src + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://www.directvnow.com', + r''' + + ''', None)[0], + { + 'formats': [{ + 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4', + 'ext': 'mp4', + }] + }) + + # from https://www.klarna.com/uk/ + # with data-video-src + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://www.directvnow.com', + r''' + + ''', None)[0], + { + 'formats': [{ + 'url': 'https://www.klarna.com/uk/wp-content/uploads/sites/11/2019/01/KL062_Smooth3_0_DogWalking_5s_920x080_.mp4', + 'ext': 'mp4', + }], + }) + + def test_extract_jwplayer_data_realworld(self): + # from http://www.suffolk.edu/sjc/ + expect_dict( + self, + self.ie._extract_jwplayer_data(r''' + + ''', None, require_title=False), + { + 'id': 'XEgvuql4', + 'formats': [{ + 'url': 'rtmp://192.138.214.154/live/sjclive', + 'ext': 'flv' + }] + }) + + # from https://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary/ + expect_dict( + self, + self.ie._extract_jwplayer_data(r''' + + ''', 'dummy', require_title=False), + { + 'thumbnail': 'https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg', + 'formats': [{ + 'url': 'https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv', + 'ext': 'flv' + }] + }) + + # from http://www.indiedb.com/games/king-machine/videos + expect_dict( + self, + self.ie._extract_jwplayer_data(r''' + + ''', 'dummy'), + { + 'title': 'king machine trailer 1', + 'thumbnail': 'http://media.indiedb.com/cache/images/games/1/50/49678/thumb_620x2000/king-machine-trailer.mp4.jpg', + 'formats': [{ + 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode_mp4/king-machine-trailer.mp4', + 'height': 360, + 'ext': 'mp4' + }, { + 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode720p_mp4/king-machine-trailer.mp4', + 'height': 720, + 'ext': 'mp4' + }] + }) + + def test_parse_m3u8_formats(self): + _TEST_CASES = [ + ( + # https://github.com/ytdl-org/youtube-dl/issues/11507 + # http://pluzz.francetv.fr/videos/le_ministere.html + 'pluzz_francetv_11507', + 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais', + [{ + 'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_0_av.m3u8?null=0', + 'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais', + 'ext': 'mp4', + 'format_id': '180', + 'protocol': 'm3u8', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc1.66.30', + 'tbr': 180, + 'width': 256, + 'height': 144, + }, { + 'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_1_av.m3u8?null=0', + 'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais', + 'ext': 'mp4', + 'format_id': '303', + 'protocol': 'm3u8', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc1.66.30', + 'tbr': 303, + 'width': 320, + 'height': 180, + }, { + 'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_2_av.m3u8?null=0', + 'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais', + 'ext': 'mp4', + 'format_id': '575', + 'protocol': 'm3u8', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc1.66.30', + 'tbr': 575, + 'width': 512, + 'height': 288, + }, { + 'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_3_av.m3u8?null=0', + 'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais', + 'ext': 'mp4', + 'format_id': '831', + 'protocol': 'm3u8', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc1.77.30', + 'tbr': 831, + 'width': 704, + 'height': 396, + }, { + 'url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/index_4_av.m3u8?null=0', + 'manifest_url': 'http://replayftv-vh.akamaihd.net/i/streaming-adaptatif_france-dom-tom/2017/S16/J2/156589847-58f59130c1f52-,standard1,standard2,standard3,standard4,standard5,.mp4.csmil/master.m3u8?caption=2017%2F16%2F156589847-1492488987.m3u8%3Afra%3AFrancais&audiotrack=0%3Afra%3AFrancais', + 'ext': 'mp4', + 'protocol': 'm3u8', + 'format_id': '1467', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc1.77.30', + 'tbr': 1467, + 'width': 1024, + 'height': 576, + }] + ), + ( + # https://github.com/ytdl-org/youtube-dl/issues/11995 + # http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor + 'teamcoco_11995', + 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8', + [{ + 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-160k_v4.m3u8', + 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8', + 'ext': 'mp4', + 'format_id': 'audio-0-Default', + 'protocol': 'm3u8', + 'vcodec': 'none', + }, { + 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8', + 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8', + 'ext': 'mp4', + 'format_id': 'audio-1-Default', + 'protocol': 'm3u8', + 'vcodec': 'none', + }, { + 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-audio-64k_v4.m3u8', + 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8', + 'ext': 'mp4', + 'format_id': '71', + 'protocol': 'm3u8', + 'acodec': 'mp4a.40.5', + 'vcodec': 'none', + 'tbr': 71, + }, { + 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8', + 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8', + 'ext': 'mp4', + 'format_id': '413', + 'protocol': 'm3u8', + 'acodec': 'none', + 'vcodec': 'avc1.42001e', + 'tbr': 413, + 'width': 400, + 'height': 224, + }, { + 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-400k_v4.m3u8', + 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8', + 'ext': 'mp4', + 'format_id': '522', + 'protocol': 'm3u8', + 'acodec': 'none', + 'vcodec': 'avc1.42001e', + 'tbr': 522, + 'width': 400, + 'height': 224, + }, { + 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-1m_v4.m3u8', + 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8', + 'ext': 'mp4', + 'format_id': '1205', + 'protocol': 'm3u8', + 'acodec': 'none', + 'vcodec': 'avc1.4d001e', + 'tbr': 1205, + 'width': 640, + 'height': 360, + }, { + 'url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/hls/CONAN_020217_Highlight_show-2m_v4.m3u8', + 'manifest_url': 'http://ak.storage-w.teamcococdn.com/cdn/2017-02/98599/ed8f/main.m3u8', + 'ext': 'mp4', + 'format_id': '2374', + 'protocol': 'm3u8', + 'acodec': 'none', + 'vcodec': 'avc1.4d001f', + 'tbr': 2374, + 'width': 1024, + 'height': 576, + }] + ), + ( + # https://github.com/ytdl-org/youtube-dl/issues/12211 + # http://video.toggle.sg/en/series/whoopie-s-world/ep3/478601 + 'toggle_mobile_12211', + 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8', + [{ + 'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_sa2ntrdg/name/a.mp4/index.m3u8', + 'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8', + 'ext': 'mp4', + 'format_id': 'audio-English', + 'protocol': 'm3u8', + 'language': 'eng', + 'vcodec': 'none', + }, { + 'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_r7y0nitg/name/a.mp4/index.m3u8', + 'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8', + 'ext': 'mp4', + 'format_id': 'audio-Undefined', + 'protocol': 'm3u8', + 'language': 'und', + 'vcodec': 'none', + }, { + 'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_qlk9hlzr/name/a.mp4/index.m3u8', + 'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8', + 'ext': 'mp4', + 'format_id': '155', + 'protocol': 'm3u8', + 'tbr': 155.648, + 'width': 320, + 'height': 180, + }, { + 'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/2/pv/1/flavorId/0_oefackmi/name/a.mp4/index.m3u8', + 'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8', + 'ext': 'mp4', + 'format_id': '502', + 'protocol': 'm3u8', + 'tbr': 502.784, + 'width': 480, + 'height': 270, + }, { + 'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_vyg9pj7k/name/a.mp4/index.m3u8', + 'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8', + 'ext': 'mp4', + 'format_id': '827', + 'protocol': 'm3u8', + 'tbr': 827.392, + 'width': 640, + 'height': 360, + }, { + 'url': 'http://k.toggle.sg/fhls/p/2082311/sp/208231100/serveFlavor/entryId/0_89q6e8ku/v/12/pv/1/flavorId/0_50n4psvx/name/a.mp4/index.m3u8', + 'manifest_url': 'http://cdnapi.kaltura.com/p/2082311/sp/208231100/playManifest/protocol/http/entryId/0_89q6e8ku/format/applehttp/tags/mobile_sd/f/a.m3u8', + 'ext': 'mp4', + 'format_id': '1396', + 'protocol': 'm3u8', + 'tbr': 1396.736, + 'width': 854, + 'height': 480, + }] + ), + ( + # http://www.twitch.tv/riotgames/v/6528877 + 'twitch_vod', + 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee', + [{ + 'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/audio_only/index-muted-HM49I092CC.m3u8', + 'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee', + 'ext': 'mp4', + 'format_id': 'Audio Only', + 'protocol': 'm3u8', + 'acodec': 'mp4a.40.2', + 'vcodec': 'none', + 'tbr': 182.725, + }, { + 'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/mobile/index-muted-HM49I092CC.m3u8', + 'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee', + 'ext': 'mp4', + 'format_id': 'Mobile', + 'protocol': 'm3u8', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc1.42C00D', + 'tbr': 280.474, + 'width': 400, + 'height': 226, + }, { + 'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/low/index-muted-HM49I092CC.m3u8', + 'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee', + 'ext': 'mp4', + 'format_id': 'Low', + 'protocol': 'm3u8', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc1.42C01E', + 'tbr': 628.347, + 'width': 640, + 'height': 360, + }, { + 'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/medium/index-muted-HM49I092CC.m3u8', + 'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee', + 'ext': 'mp4', + 'format_id': 'Medium', + 'protocol': 'm3u8', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc1.42C01E', + 'tbr': 893.387, + 'width': 852, + 'height': 480, + }, { + 'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/high/index-muted-HM49I092CC.m3u8', + 'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee', + 'ext': 'mp4', + 'format_id': 'High', + 'protocol': 'm3u8', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc1.42C01F', + 'tbr': 1603.789, + 'width': 1280, + 'height': 720, + }, { + 'url': 'https://vod.edgecast.hls.ttvnw.net/e5da31ab49_riotgames_15001215120_261543898/chunked/index-muted-HM49I092CC.m3u8', + 'manifest_url': 'https://usher.ttvnw.net/vod/6528877?allow_source=true&allow_audio_only=true&allow_spectre=true&player=twitchweb&nauth=%7B%22user_id%22%3Anull%2C%22vod_id%22%3A6528877%2C%22expires%22%3A1492887874%2C%22chansub%22%3A%7B%22restricted_bitrates%22%3A%5B%5D%7D%2C%22privileged%22%3Afalse%2C%22https_required%22%3Afalse%7D&nauthsig=3e29296a6824a0f48f9e731383f77a614fc79bee', + 'ext': 'mp4', + 'format_id': 'Source', + 'protocol': 'm3u8', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc1.100.31', + 'tbr': 3214.134, + 'width': 1280, + 'height': 720, + }] + ), + ( + # http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015 + # EXT-X-STREAM-INF tag with NAME attribute that is not defined + # in HLS specification + 'vidio', + 'https://www.vidio.com/videos/165683/playlist.m3u8', + [{ + 'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b300.mp4.m3u8', + 'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8', + 'ext': 'mp4', + 'format_id': '270p 3G', + 'protocol': 'm3u8', + 'tbr': 300, + 'width': 480, + 'height': 270, + }, { + 'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b600.mp4.m3u8', + 'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8', + 'ext': 'mp4', + 'format_id': '360p SD', + 'protocol': 'm3u8', + 'tbr': 600, + 'width': 640, + 'height': 360, + }, { + 'url': 'https://cdn1-a.production.vidio.static6.com/uploads/165683/dj_ambred-4383-b1200.mp4.m3u8', + 'manifest_url': 'https://www.vidio.com/videos/165683/playlist.m3u8', + 'ext': 'mp4', + 'format_id': '720p HD', + 'protocol': 'm3u8', + 'tbr': 1200, + 'width': 1280, + 'height': 720, + }] + ), + ( + # https://github.com/ytdl-org/youtube-dl/issues/18923 + # https://www.ted.com/talks/boris_hesser_a_grassroots_healthcare_revolution_in_africa + 'ted_18923', + 'http://hls.ted.com/talks/31241.m3u8', + [{ + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '600k-Audio', + 'vcodec': 'none', + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/audio/600k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '68', + 'vcodec': 'none', + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/64k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '163', + 'acodec': 'none', + 'width': 320, + 'height': 180, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/180k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '481', + 'acodec': 'none', + 'width': 512, + 'height': 288, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/320k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '769', + 'acodec': 'none', + 'width': 512, + 'height': 288, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/450k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '984', + 'acodec': 'none', + 'width': 512, + 'height': 288, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/600k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '1255', + 'acodec': 'none', + 'width': 640, + 'height': 360, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/950k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '1693', + 'acodec': 'none', + 'width': 853, + 'height': 480, + }, { + 'url': 'http://hls.ted.com/videos/BorisHesser_2018S/video/1500k.m3u8?nobumpers=true&uniqueId=76011e2b', + 'format_id': '2462', + 'acodec': 'none', + 'width': 1280, + 'height': 720, + }] + ), + ] + + for m3u8_file, m3u8_url, expected_formats in _TEST_CASES: + with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, + mode='r', encoding='utf-8') as f: + formats = self.ie._parse_m3u8_formats( + f.read(), m3u8_url, ext='mp4') + self.ie._sort_formats(formats) + expect_value(self, formats, expected_formats, None) + + def test_parse_mpd_formats(self): + _TEST_CASES = [ + ( + # https://github.com/ytdl-org/youtube-dl/issues/13919 + # Also tests duplicate representation ids, see + # https://github.com/ytdl-org/youtube-dl/issues/15111 + 'float_duration', + 'http://unknown/manifest.mpd', # mpd_url + None, # mpd_base_url + [{ + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'm4a', + 'format_id': '318597', + 'format_note': 'DASH audio', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'none', + 'tbr': 61.587, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '318597', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.42001f', + 'tbr': 318.597, + 'width': 340, + 'height': 192, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '638590', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.42001f', + 'tbr': 638.59, + 'width': 512, + 'height': 288, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '1022565', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.4d001f', + 'tbr': 1022.565, + 'width': 688, + 'height': 384, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '2046506', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.4d001f', + 'tbr': 2046.506, + 'width': 1024, + 'height': 576, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '3998017', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.640029', + 'tbr': 3998.017, + 'width': 1280, + 'height': 720, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': '5997485', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'none', + 'vcodec': 'avc1.640032', + 'tbr': 5997.485, + 'width': 1920, + 'height': 1080, + }] + ), ( + # https://github.com/ytdl-org/youtube-dl/pull/14844 + 'urls_only', + 'http://unknown/manifest.mpd', # mpd_url + None, # mpd_base_url + [{ + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_144p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 200, + 'width': 256, + 'height': 144, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_240p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 400, + 'width': 424, + 'height': 240, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_360p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 800, + 'width': 640, + 'height': 360, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_480p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 1200, + 'width': 856, + 'height': 480, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_576p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 1600, + 'width': 1024, + 'height': 576, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_720p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 2400, + 'width': 1280, + 'height': 720, + }, { + 'manifest_url': 'http://unknown/manifest.mpd', + 'ext': 'mp4', + 'format_id': 'h264_aac_1080p_m4s', + 'format_note': 'DASH video', + 'protocol': 'http_dash_segments', + 'acodec': 'mp4a.40.2', + 'vcodec': 'avc3.42c01e', + 'tbr': 4400, + 'width': 1920, + 'height': 1080, + }] + ), ( + # https://github.com/ytdl-org/youtube-dl/issues/20346 + # Media considered unfragmented even though it contains + # Initialization tag + 'unfragmented', + 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', # mpd_url + 'https://v.redd.it/hw1x7rcg7zl21', # mpd_base_url + [{ + 'url': 'https://v.redd.it/hw1x7rcg7zl21/audio', + 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', + 'ext': 'm4a', + 'format_id': 'AUDIO-1', + 'format_note': 'DASH audio', + 'container': 'm4a_dash', + 'acodec': 'mp4a.40.2', + 'vcodec': 'none', + 'tbr': 129.87, + 'asr': 48000, + + }, { + 'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_240', + 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', + 'ext': 'mp4', + 'format_id': 'VIDEO-2', + 'format_note': 'DASH video', + 'container': 'mp4_dash', + 'acodec': 'none', + 'vcodec': 'avc1.4d401e', + 'tbr': 608.0, + 'width': 240, + 'height': 240, + 'fps': 30, + }, { + 'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_360', + 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', + 'ext': 'mp4', + 'format_id': 'VIDEO-1', + 'format_note': 'DASH video', + 'container': 'mp4_dash', + 'acodec': 'none', + 'vcodec': 'avc1.4d401e', + 'tbr': 804.261, + 'width': 360, + 'height': 360, + 'fps': 30, + }] + ) + ] + + for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES: + with io.open('./test/testdata/mpd/%s.mpd' % mpd_file, + mode='r', encoding='utf-8') as f: + formats = self.ie._parse_mpd_formats( + compat_etree_fromstring(f.read().encode('utf-8')), + mpd_base_url=mpd_base_url, mpd_url=mpd_url) + self.ie._sort_formats(formats) + expect_value(self, formats, expected_formats, None) + + def test_parse_f4m_formats(self): + _TEST_CASES = [ + ( + # https://github.com/ytdl-org/youtube-dl/issues/14660 + 'custom_base_url', + 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m', + [{ + 'manifest_url': 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m', + 'ext': 'flv', + 'format_id': '2148', + 'protocol': 'f4m', + 'tbr': 2148, + 'width': 1280, + 'height': 720, + }] + ), + ] + + for f4m_file, f4m_url, expected_formats in _TEST_CASES: + with io.open('./test/testdata/f4m/%s.f4m' % f4m_file, + mode='r', encoding='utf-8') as f: + formats = self.ie._parse_f4m_formats( + compat_etree_fromstring(f.read().encode('utf-8')), + f4m_url, None) + self.ie._sort_formats(formats) + expect_value(self, formats, expected_formats, None) + + def test_parse_xspf(self): + _TEST_CASES = [ + ( + 'foo_xspf', + 'https://example.org/src/foo_xspf.xspf', + [{ + 'id': 'foo_xspf', + 'title': 'Pandemonium', + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 202.416, + 'formats': [{ + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.org/src/cd1/track%201.mp3', + }], + }, { + 'id': 'foo_xspf', + 'title': 'Final Cartridge (Nichico Twelve Remix)', + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 255.857, + 'formats': [{ + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3', + }], + }, { + 'id': 'foo_xspf', + 'title': 'Rebuilding Nightingale', + 'description': 'Visit http://bigbrother404.bandcamp.com', + 'duration': 287.915, + 'formats': [{ + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.org/src/track3.mp3', + }, { + 'manifest_url': 'https://example.org/src/foo_xspf.xspf', + 'url': 'https://example.com/track3.mp3', + }] + }] + ), + ] + + for xspf_file, xspf_url, expected_entries in _TEST_CASES: + with io.open('./test/testdata/xspf/%s.xspf' % xspf_file, + mode='r', encoding='utf-8') as f: + entries = self.ie._parse_xspf( + compat_etree_fromstring(f.read().encode('utf-8')), + xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url) + expect_value(self, entries, expected_entries, None) + for i in range(len(entries)): + expect_dict(self, entries[i], expected_entries[i]) + + def test_response_with_expected_status_returns_content(self): + # Checks for mitigations against the effects of + # that affect Python 3.4.1+, which + # manifest as `_download_webpage`, `_download_xml`, `_download_json`, + # or the underlying `_download_webpage_handle` returning no content + # when a response matches `expected_status`. + + httpd = compat_http_server.HTTPServer( + ('127.0.0.1', 0), InfoExtractorTestRequestHandler) + port = http_server_port(httpd) + server_thread = threading.Thread(target=httpd.serve_forever) + server_thread.daemon = True + server_thread.start() + + (content, urlh) = self.ie._download_webpage_handle( + 'http://127.0.0.1:%d/teapot' % port, None, + expected_status=TEAPOT_RESPONSE_STATUS) + self.assertEqual(content, TEAPOT_RESPONSE_BODY) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py new file mode 100644 index 000000000..ce9666171 --- /dev/null +++ b/test/test_YoutubeDL.py @@ -0,0 +1,904 @@ +#!/usr/bin/env python +# coding: utf-8 + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import copy + +from test.helper import FakeYDL, assertRegexpMatches +from youtube_dl import YoutubeDL +from youtube_dl.compat import compat_str, compat_urllib_error +from youtube_dl.extractor import YoutubeIE +from youtube_dl.extractor.common import InfoExtractor +from youtube_dl.postprocessor.common import PostProcessor +from youtube_dl.utils import ExtractorError, match_filter_func + +TEST_URL = 'http://localhost/sample.mp4' + + +class YDL(FakeYDL): + def __init__(self, *args, **kwargs): + super(YDL, self).__init__(*args, **kwargs) + self.downloaded_info_dicts = [] + self.msgs = [] + + def process_info(self, info_dict): + self.downloaded_info_dicts.append(info_dict) + + def to_screen(self, msg): + self.msgs.append(msg) + + +def _make_result(formats, **kwargs): + res = { + 'formats': formats, + 'id': 'testid', + 'title': 'testttitle', + 'extractor': 'testex', + 'extractor_key': 'TestEx', + } + res.update(**kwargs) + return res + + +class TestFormatSelection(unittest.TestCase): + def test_prefer_free_formats(self): + # Same resolution => download webm + ydl = YDL() + ydl.params['prefer_free_formats'] = True + formats = [ + {'ext': 'webm', 'height': 460, 'url': TEST_URL}, + {'ext': 'mp4', 'height': 460, 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['ext'], 'webm') + + # Different resolution => download best quality (mp4) + ydl = YDL() + ydl.params['prefer_free_formats'] = True + formats = [ + {'ext': 'webm', 'height': 720, 'url': TEST_URL}, + {'ext': 'mp4', 'height': 1080, 'url': TEST_URL}, + ] + info_dict['formats'] = formats + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['ext'], 'mp4') + + # No prefer_free_formats => prefer mp4 and flv for greater compatibility + ydl = YDL() + ydl.params['prefer_free_formats'] = False + formats = [ + {'ext': 'webm', 'height': 720, 'url': TEST_URL}, + {'ext': 'mp4', 'height': 720, 'url': TEST_URL}, + {'ext': 'flv', 'height': 720, 'url': TEST_URL}, + ] + info_dict['formats'] = formats + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['ext'], 'mp4') + + ydl = YDL() + ydl.params['prefer_free_formats'] = False + formats = [ + {'ext': 'flv', 'height': 720, 'url': TEST_URL}, + {'ext': 'webm', 'height': 720, 'url': TEST_URL}, + ] + info_dict['formats'] = formats + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['ext'], 'flv') + + def test_format_selection(self): + formats = [ + {'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL}, + {'format_id': 'example-with-dashes', 'ext': 'webm', 'preference': 1, 'url': TEST_URL}, + {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL}, + {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL}, + {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + ydl = YDL({'format': '20/47'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '47') + + ydl = YDL({'format': '20/71/worst'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '35') + + ydl = YDL() + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '2') + + ydl = YDL({'format': 'webm/mp4'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '47') + + ydl = YDL({'format': '3gp/40/mp4'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '35') + + ydl = YDL({'format': 'example-with-dashes'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'example-with-dashes') + + def test_format_selection_audio(self): + formats = [ + {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL}, + {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL}, + {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': TEST_URL}, + {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + ydl = YDL({'format': 'bestaudio'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'audio-high') + + ydl = YDL({'format': 'worstaudio'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'audio-low') + + formats = [ + {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL}, + {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + ydl = YDL({'format': 'bestaudio/worstaudio/best'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'vid-high') + + def test_format_selection_audio_exts(self): + formats = [ + {'format_id': 'mp3-64', 'ext': 'mp3', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, + {'format_id': 'ogg-64', 'ext': 'ogg', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, + {'format_id': 'aac-64', 'ext': 'aac', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, + {'format_id': 'mp3-32', 'ext': 'mp3', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'}, + {'format_id': 'aac-32', 'ext': 'aac', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'}, + ] + + info_dict = _make_result(formats) + ydl = YDL({'format': 'best'}) + ie = YoutubeIE(ydl) + ie._sort_formats(info_dict['formats']) + ydl.process_ie_result(copy.deepcopy(info_dict)) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'aac-64') + + ydl = YDL({'format': 'mp3'}) + ie = YoutubeIE(ydl) + ie._sort_formats(info_dict['formats']) + ydl.process_ie_result(copy.deepcopy(info_dict)) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'mp3-64') + + ydl = YDL({'prefer_free_formats': True}) + ie = YoutubeIE(ydl) + ie._sort_formats(info_dict['formats']) + ydl.process_ie_result(copy.deepcopy(info_dict)) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'ogg-64') + + def test_format_selection_video(self): + formats = [ + {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': TEST_URL}, + {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': TEST_URL}, + {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + ydl = YDL({'format': 'bestvideo'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'dash-video-high') + + ydl = YDL({'format': 'worstvideo'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'dash-video-low') + + ydl = YDL({'format': 'bestvideo[format_id^=dash][format_id$=low]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'dash-video-low') + + formats = [ + {'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + ydl = YDL({'format': 'bestvideo[vcodec=avc1.123456]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot') + + def test_format_selection_string_ops(self): + formats = [ + {'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL}, + {'format_id': 'zxc-cxz', 'ext': 'webm', 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + # equals (=) + ydl = YDL({'format': '[format_id=abc-cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not equal (!=) + ydl = YDL({'format': '[format_id!=abc-cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!=abc-cba][format_id!=zxc-cxz]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + # starts with (^=) + ydl = YDL({'format': '[format_id^=abc]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not start with (!^=) + ydl = YDL({'format': '[format_id!^=abc]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!^=abc][format_id!^=zxc]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + # ends with ($=) + ydl = YDL({'format': '[format_id$=cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not end with (!$=) + ydl = YDL({'format': '[format_id!$=cba]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!$=cba][format_id!$=cxz]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + # contains (*=) + ydl = YDL({'format': '[format_id*=bc-cb]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'abc-cba') + + # does not contain (!*=) + ydl = YDL({'format': '[format_id!*=bc-cb]'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'zxc-cxz') + + ydl = YDL({'format': '[format_id!*=abc][format_id!*=zxc]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + ydl = YDL({'format': '[format_id!*=-]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + def test_youtube_format_selection(self): + order = [ + '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13', + # Apple HTTP Live Streaming + '96', '95', '94', '93', '92', '132', '151', + # 3D + '85', '84', '102', '83', '101', '82', '100', + # Dash video + '137', '248', '136', '247', '135', '246', + '245', '244', '134', '243', '133', '242', '160', + # Dash audio + '141', '172', '140', '171', '139', + ] + + def format_info(f_id): + info = YoutubeIE._formats[f_id].copy() + + # XXX: In real cases InfoExtractor._parse_mpd_formats() fills up 'acodec' + # and 'vcodec', while in tests such information is incomplete since + # commit a6c2c24479e5f4827ceb06f64d855329c0a6f593 + # test_YoutubeDL.test_youtube_format_selection is broken without + # this fix + if 'acodec' in info and 'vcodec' not in info: + info['vcodec'] = 'none' + elif 'vcodec' in info and 'acodec' not in info: + info['acodec'] = 'none' + + info['format_id'] = f_id + info['url'] = 'url:' + f_id + return info + formats_order = [format_info(f_id) for f_id in order] + + info_dict = _make_result(list(formats_order), extractor='youtube') + ydl = YDL({'format': 'bestvideo+bestaudio'}) + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '137+141') + self.assertEqual(downloaded['ext'], 'mp4') + + info_dict = _make_result(list(formats_order), extractor='youtube') + ydl = YDL({'format': 'bestvideo[height>=999999]+bestaudio/best'}) + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], '38') + + info_dict = _make_result(list(formats_order), extractor='youtube') + ydl = YDL({'format': 'bestvideo/best,bestaudio'}) + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] + self.assertEqual(downloaded_ids, ['137', '141']) + + info_dict = _make_result(list(formats_order), extractor='youtube') + ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])+bestaudio'}) + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] + self.assertEqual(downloaded_ids, ['137+141', '248+141']) + + info_dict = _make_result(list(formats_order), extractor='youtube') + ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])[height<=720]+bestaudio'}) + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] + self.assertEqual(downloaded_ids, ['136+141', '247+141']) + + info_dict = _make_result(list(formats_order), extractor='youtube') + ydl = YDL({'format': '(bestvideo[ext=none]/bestvideo[ext=webm])+bestaudio'}) + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] + self.assertEqual(downloaded_ids, ['248+141']) + + for f1, f2 in zip(formats_order, formats_order[1:]): + info_dict = _make_result([f1, f2], extractor='youtube') + ydl = YDL({'format': 'best/bestvideo'}) + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], f1['format_id']) + + info_dict = _make_result([f2, f1], extractor='youtube') + ydl = YDL({'format': 'best/bestvideo'}) + yie = YoutubeIE(ydl) + yie._sort_formats(info_dict['formats']) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], f1['format_id']) + + def test_audio_only_extractor_format_selection(self): + # For extractors with incomplete formats (all formats are audio-only or + # video-only) best and worst should fallback to corresponding best/worst + # video-only or audio-only formats (as per + # https://github.com/ytdl-org/youtube-dl/pull/5556) + formats = [ + {'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL}, + {'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + ydl = YDL({'format': 'best'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'high') + + ydl = YDL({'format': 'worst'}) + ydl.process_ie_result(info_dict.copy()) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'low') + + def test_format_not_available(self): + formats = [ + {'format_id': 'regular', 'ext': 'mp4', 'height': 360, 'url': TEST_URL}, + {'format_id': 'video', 'ext': 'mp4', 'height': 720, 'acodec': 'none', 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + # This must fail since complete video-audio format does not match filter + # and extractor does not provide incomplete only formats (i.e. only + # video-only or audio-only). + ydl = YDL({'format': 'best[height>360]'}) + self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) + + def test_format_selection_issue_10083(self): + # See https://github.com/ytdl-org/youtube-dl/issues/10083 + formats = [ + {'format_id': 'regular', 'height': 360, 'url': TEST_URL}, + {'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL}, + {'format_id': 'audio', 'vcodec': 'none', 'url': TEST_URL}, + ] + info_dict = _make_result(formats) + + ydl = YDL({'format': 'best[height>360]/bestvideo[height>360]+bestaudio'}) + ydl.process_ie_result(info_dict.copy()) + self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'video+audio') + + def test_invalid_format_specs(self): + def assert_syntax_error(format_spec): + ydl = YDL({'format': format_spec}) + info_dict = _make_result([{'format_id': 'foo', 'url': TEST_URL}]) + self.assertRaises(SyntaxError, ydl.process_ie_result, info_dict) + + assert_syntax_error('bestvideo,,best') + assert_syntax_error('+bestaudio') + assert_syntax_error('bestvideo+') + assert_syntax_error('/') + + def test_format_filtering(self): + formats = [ + {'format_id': 'A', 'filesize': 500, 'width': 1000}, + {'format_id': 'B', 'filesize': 1000, 'width': 500}, + {'format_id': 'C', 'filesize': 1000, 'width': 400}, + {'format_id': 'D', 'filesize': 2000, 'width': 600}, + {'format_id': 'E', 'filesize': 3000}, + {'format_id': 'F'}, + {'format_id': 'G', 'filesize': 1000000}, + ] + for f in formats: + f['url'] = 'http://_/' + f['ext'] = 'unknown' + info_dict = _make_result(formats) + + ydl = YDL({'format': 'best[filesize<3000]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'D') + + ydl = YDL({'format': 'best[filesize<=3000]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'E') + + ydl = YDL({'format': 'best[filesize <= ? 3000]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'F') + + ydl = YDL({'format': 'best [filesize = 1000] [width>450]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'B') + + ydl = YDL({'format': 'best [filesize = 1000] [width!=450]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'C') + + ydl = YDL({'format': '[filesize>?1]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'G') + + ydl = YDL({'format': '[filesize<1M]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'E') + + ydl = YDL({'format': '[filesize<1MiB]'}) + ydl.process_ie_result(info_dict) + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['format_id'], 'G') + + ydl = YDL({'format': 'all[width>=400][width<=600]'}) + ydl.process_ie_result(info_dict) + downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] + self.assertEqual(downloaded_ids, ['B', 'C', 'D']) + + ydl = YDL({'format': 'best[height<40]'}) + try: + ydl.process_ie_result(info_dict) + except ExtractorError: + pass + self.assertEqual(ydl.downloaded_info_dicts, []) + + def test_default_format_spec(self): + ydl = YDL({'simulate': True}) + self.assertEqual(ydl._default_format_spec({}), 'bestvideo+bestaudio/best') + + ydl = YDL({}) + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') + + ydl = YDL({'simulate': True}) + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'bestvideo+bestaudio/best') + + ydl = YDL({'outtmpl': '-'}) + self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') + + ydl = YDL({}) + self.assertEqual(ydl._default_format_spec({}, download=False), 'bestvideo+bestaudio/best') + self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') + + +class TestYoutubeDL(unittest.TestCase): + def test_subtitles(self): + def s_formats(lang, autocaption=False): + return [{ + 'ext': ext, + 'url': 'http://localhost/video.%s.%s' % (lang, ext), + '_auto': autocaption, + } for ext in ['vtt', 'srt', 'ass']] + subtitles = dict((l, s_formats(l)) for l in ['en', 'fr', 'es']) + auto_captions = dict((l, s_formats(l, True)) for l in ['it', 'pt', 'es']) + info_dict = { + 'id': 'test', + 'title': 'Test', + 'url': 'http://localhost/video.mp4', + 'subtitles': subtitles, + 'automatic_captions': auto_captions, + 'extractor': 'TEST', + } + + def get_info(params={}): + params.setdefault('simulate', True) + ydl = YDL(params) + ydl.report_warning = lambda *args, **kargs: None + return ydl.process_video_result(info_dict, download=False) + + result = get_info() + self.assertFalse(result.get('requested_subtitles')) + self.assertEqual(result['subtitles'], subtitles) + self.assertEqual(result['automatic_captions'], auto_captions) + + result = get_info({'writesubtitles': True}) + subs = result['requested_subtitles'] + self.assertTrue(subs) + self.assertEqual(set(subs.keys()), set(['en'])) + self.assertTrue(subs['en'].get('data') is None) + self.assertEqual(subs['en']['ext'], 'ass') + + result = get_info({'writesubtitles': True, 'subtitlesformat': 'foo/srt'}) + subs = result['requested_subtitles'] + self.assertEqual(subs['en']['ext'], 'srt') + + result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']}) + subs = result['requested_subtitles'] + self.assertTrue(subs) + self.assertEqual(set(subs.keys()), set(['es', 'fr'])) + + result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']}) + subs = result['requested_subtitles'] + self.assertTrue(subs) + self.assertEqual(set(subs.keys()), set(['es', 'pt'])) + self.assertFalse(subs['es']['_auto']) + self.assertTrue(subs['pt']['_auto']) + + result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']}) + subs = result['requested_subtitles'] + self.assertTrue(subs) + self.assertEqual(set(subs.keys()), set(['es', 'pt'])) + self.assertTrue(subs['es']['_auto']) + self.assertTrue(subs['pt']['_auto']) + + def test_add_extra_info(self): + test_dict = { + 'extractor': 'Foo', + } + extra_info = { + 'extractor': 'Bar', + 'playlist': 'funny videos', + } + YDL.add_extra_info(test_dict, extra_info) + self.assertEqual(test_dict['extractor'], 'Foo') + self.assertEqual(test_dict['playlist'], 'funny videos') + + def test_prepare_filename(self): + info = { + 'id': '1234', + 'ext': 'mp4', + 'width': None, + 'height': 1080, + 'title1': '$PATH', + 'title2': '%PATH%', + } + + def fname(templ): + ydl = YoutubeDL({'outtmpl': templ}) + return ydl.prepare_filename(info) + self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4') + self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4') + # Replace missing fields with 'NA' + self.assertEqual(fname('%(uploader_date)s-%(id)s.%(ext)s'), 'NA-1234.mp4') + self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4') + self.assertEqual(fname('%(height)6d.%(ext)s'), ' 1080.mp4') + self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080 .mp4') + self.assertEqual(fname('%(height)06d.%(ext)s'), '001080.mp4') + self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4') + self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4') + self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4') + self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4') + self.assertEqual(fname('%(height) 0 6d.%(ext)s'), ' 01080.mp4') + self.assertEqual(fname('%%'), '%') + self.assertEqual(fname('%%%%'), '%%') + self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4') + self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4') + self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s') + self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4') + self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH') + self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%') + + def test_format_note(self): + ydl = YoutubeDL() + self.assertEqual(ydl._format_note({}), '') + assertRegexpMatches(self, ydl._format_note({ + 'vbr': 10, + }), r'^\s*10k$') + assertRegexpMatches(self, ydl._format_note({ + 'fps': 30, + }), r'^30fps$') + + def test_postprocessors(self): + filename = 'post-processor-testfile.mp4' + audiofile = filename + '.mp3' + + class SimplePP(PostProcessor): + def run(self, info): + with open(audiofile, 'wt') as f: + f.write('EXAMPLE') + return [info['filepath']], info + + def run_pp(params, PP): + with open(filename, 'wt') as f: + f.write('EXAMPLE') + ydl = YoutubeDL(params) + ydl.add_post_processor(PP()) + ydl.post_process(filename, {'filepath': filename}) + + run_pp({'keepvideo': True}, SimplePP) + self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename) + self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) + os.unlink(filename) + os.unlink(audiofile) + + run_pp({'keepvideo': False}, SimplePP) + self.assertFalse(os.path.exists(filename), '%s exists' % filename) + self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile) + os.unlink(audiofile) + + class ModifierPP(PostProcessor): + def run(self, info): + with open(info['filepath'], 'wt') as f: + f.write('MODIFIED') + return [], info + + run_pp({'keepvideo': False}, ModifierPP) + self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename) + os.unlink(filename) + + def test_match_filter(self): + class FilterYDL(YDL): + def __init__(self, *args, **kwargs): + super(FilterYDL, self).__init__(*args, **kwargs) + self.params['simulate'] = True + + def process_info(self, info_dict): + super(YDL, self).process_info(info_dict) + + def _match_entry(self, info_dict, incomplete): + res = super(FilterYDL, self)._match_entry(info_dict, incomplete) + if res is None: + self.downloaded_info_dicts.append(info_dict) + return res + + first = { + 'id': '1', + 'url': TEST_URL, + 'title': 'one', + 'extractor': 'TEST', + 'duration': 30, + 'filesize': 10 * 1024, + 'playlist_id': '42', + 'uploader': "變態妍字幕版 太妍 тест", + 'creator': "тест ' 123 ' тест--", + } + second = { + 'id': '2', + 'url': TEST_URL, + 'title': 'two', + 'extractor': 'TEST', + 'duration': 10, + 'description': 'foo', + 'filesize': 5 * 1024, + 'playlist_id': '43', + 'uploader': "тест 123", + } + videos = [first, second] + + def get_videos(filter_=None): + ydl = FilterYDL({'match_filter': filter_}) + for v in videos: + ydl.process_ie_result(v, download=True) + return [v['id'] for v in ydl.downloaded_info_dicts] + + res = get_videos() + self.assertEqual(res, ['1', '2']) + + def f(v): + if v['id'] == '1': + return None + else: + return 'Video id is not 1' + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func('duration < 30') + res = get_videos(f) + self.assertEqual(res, ['2']) + + f = match_filter_func('description = foo') + res = get_videos(f) + self.assertEqual(res, ['2']) + + f = match_filter_func('description =? foo') + res = get_videos(f) + self.assertEqual(res, ['1', '2']) + + f = match_filter_func('filesize > 5KiB') + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func('playlist_id = 42') + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func('uploader = "變態妍字幕版 太妍 тест"') + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func('uploader != "變態妍字幕版 太妍 тест"') + res = get_videos(f) + self.assertEqual(res, ['2']) + + f = match_filter_func('creator = "тест \' 123 \' тест--"') + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func("creator = 'тест \\' 123 \\' тест--'") + res = get_videos(f) + self.assertEqual(res, ['1']) + + f = match_filter_func(r"creator = 'тест \' 123 \' тест--' & duration > 30") + res = get_videos(f) + self.assertEqual(res, []) + + def test_playlist_items_selection(self): + entries = [{ + 'id': compat_str(i), + 'title': compat_str(i), + 'url': TEST_URL, + } for i in range(1, 5)] + playlist = { + '_type': 'playlist', + 'id': 'test', + 'entries': entries, + 'extractor': 'test:playlist', + 'extractor_key': 'test:playlist', + 'webpage_url': 'http://example.com', + } + + def get_ids(params): + ydl = YDL(params) + # make a copy because the dictionary can be modified + ydl.process_ie_result(playlist.copy()) + return [int(v['id']) for v in ydl.downloaded_info_dicts] + + result = get_ids({}) + self.assertEqual(result, [1, 2, 3, 4]) + + result = get_ids({'playlistend': 10}) + self.assertEqual(result, [1, 2, 3, 4]) + + result = get_ids({'playlistend': 2}) + self.assertEqual(result, [1, 2]) + + result = get_ids({'playliststart': 10}) + self.assertEqual(result, []) + + result = get_ids({'playliststart': 2}) + self.assertEqual(result, [2, 3, 4]) + + result = get_ids({'playlist_items': '2-4'}) + self.assertEqual(result, [2, 3, 4]) + + result = get_ids({'playlist_items': '2,4'}) + self.assertEqual(result, [2, 4]) + + result = get_ids({'playlist_items': '10'}) + self.assertEqual(result, []) + + result = get_ids({'playlist_items': '3-10'}) + self.assertEqual(result, [3, 4]) + + result = get_ids({'playlist_items': '2-4,3-4,3'}) + self.assertEqual(result, [2, 3, 4]) + + def test_urlopen_no_file_protocol(self): + # see https://github.com/ytdl-org/youtube-dl/issues/8227 + ydl = YDL() + self.assertRaises(compat_urllib_error.URLError, ydl.urlopen, 'file:///etc/passwd') + + def test_do_not_override_ie_key_in_url_transparent(self): + ydl = YDL() + + class Foo1IE(InfoExtractor): + _VALID_URL = r'foo1:' + + def _real_extract(self, url): + return { + '_type': 'url_transparent', + 'url': 'foo2:', + 'ie_key': 'Foo2', + 'title': 'foo1 title', + 'id': 'foo1_id', + } + + class Foo2IE(InfoExtractor): + _VALID_URL = r'foo2:' + + def _real_extract(self, url): + return { + '_type': 'url', + 'url': 'foo3:', + 'ie_key': 'Foo3', + } + + class Foo3IE(InfoExtractor): + _VALID_URL = r'foo3:' + + def _real_extract(self, url): + return _make_result([{'url': TEST_URL}], title='foo3 title') + + ydl.add_info_extractor(Foo1IE(ydl)) + ydl.add_info_extractor(Foo2IE(ydl)) + ydl.add_info_extractor(Foo3IE(ydl)) + ydl.extract_info('foo1:') + downloaded = ydl.downloaded_info_dicts[0] + self.assertEqual(downloaded['url'], TEST_URL) + self.assertEqual(downloaded['title'], 'foo1 title') + self.assertEqual(downloaded['id'], 'testid') + self.assertEqual(downloaded['extractor'], 'testex') + self.assertEqual(downloaded['extractor_key'], 'TestEx') + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py new file mode 100644 index 000000000..f959798de --- /dev/null +++ b/test/test_YoutubeDLCookieJar.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# coding: utf-8 + +from __future__ import unicode_literals + +import os +import re +import sys +import tempfile +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.utils import YoutubeDLCookieJar + + +class TestYoutubeDLCookieJar(unittest.TestCase): + def test_keep_session_cookies(self): + cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt') + cookiejar.load(ignore_discard=True, ignore_expires=True) + tf = tempfile.NamedTemporaryFile(delete=False) + try: + cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True) + temp = tf.read().decode('utf-8') + self.assertTrue(re.search( + r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp)) + self.assertTrue(re.search( + r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpires0\s+YoutubeDLExpires0Value', temp)) + finally: + tf.close() + os.remove(tf.name) + + def test_strip_httponly_prefix(self): + cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt') + cookiejar.load(ignore_discard=True, ignore_expires=True) + + def assert_cookie_has_value(key): + self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE') + + assert_cookie_has_value('HTTPONLY_COOKIE') + assert_cookie_has_value('JS_ACCESSIBLE_COOKIE') + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_aes.py b/test/test_aes.py new file mode 100644 index 000000000..cc89fb6ab --- /dev/null +++ b/test/test_aes.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text +from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes +import base64 + +# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py' + + +class TestAES(unittest.TestCase): + def setUp(self): + self.key = self.iv = [0x20, 0x15] + 14 * [0] + self.secret_msg = b'Secret message goes here' + + def test_encrypt(self): + msg = b'message' + key = list(range(16)) + encrypted = aes_encrypt(bytes_to_intlist(msg), key) + decrypted = intlist_to_bytes(aes_decrypt(encrypted, key)) + self.assertEqual(decrypted, msg) + + def test_cbc_decrypt(self): + data = bytes_to_intlist( + b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd" + ) + decrypted = intlist_to_bytes(aes_cbc_decrypt(data, self.key, self.iv)) + self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) + + def test_cbc_encrypt(self): + data = bytes_to_intlist(self.secret_msg) + encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv)) + self.assertEqual( + encrypted, + b"\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd") + + def test_decrypt_text(self): + password = intlist_to_bytes(self.key).decode('utf-8') + encrypted = base64.b64encode( + intlist_to_bytes(self.iv[:8]) + + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae' + ).decode('utf-8') + decrypted = (aes_decrypt_text(encrypted, password, 16)) + self.assertEqual(decrypted, self.secret_msg) + + password = intlist_to_bytes(self.key).decode('utf-8') + encrypted = base64.b64encode( + intlist_to_bytes(self.iv[:8]) + + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83' + ).decode('utf-8') + decrypted = (aes_decrypt_text(encrypted, password, 32)) + self.assertEqual(decrypted, self.secret_msg) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py new file mode 100644 index 000000000..6f5513faa --- /dev/null +++ b/test/test_age_restriction.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import try_rm + + +from youtube_dl import YoutubeDL + + +def _download_restricted(url, filename, age): + """ Returns true if the file has been downloaded """ + + params = { + 'age_limit': age, + 'skip_download': True, + 'writeinfojson': True, + 'outtmpl': '%(id)s.%(ext)s', + } + ydl = YoutubeDL(params) + ydl.add_default_info_extractors() + json_filename = os.path.splitext(filename)[0] + '.info.json' + try_rm(json_filename) + ydl.download([url]) + res = os.path.exists(json_filename) + try_rm(json_filename) + return res + + +class TestAgeRestriction(unittest.TestCase): + def _assert_restricted(self, url, filename, age, old_age=None): + self.assertTrue(_download_restricted(url, filename, old_age)) + self.assertFalse(_download_restricted(url, filename, age)) + + def test_youtube(self): + self._assert_restricted('07FYdnEawAQ', '07FYdnEawAQ.mp4', 10) + + def test_youporn(self): + self._assert_restricted( + 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/', + '505835.mp4', 2, old_age=25) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_all_urls.py b/test/test_all_urls.py new file mode 100644 index 000000000..465ce0050 --- /dev/null +++ b/test/test_all_urls.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +import collections +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from test.helper import gettestcases + +from youtube_dl.extractor import ( + FacebookIE, + gen_extractors, + YoutubeIE, +) + + +class TestAllURLsMatching(unittest.TestCase): + def setUp(self): + self.ies = gen_extractors() + + def matching_ies(self, url): + return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic'] + + def assertMatch(self, url, ie_list): + self.assertEqual(self.matching_ies(url), ie_list) + + def test_youtube_playlist_matching(self): + assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist']) + assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') + assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585 + assertPlaylist('PL63F0C78739B09958') + assertPlaylist('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') + assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') + assertPlaylist('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') + assertPlaylist('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668 + self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M')) + # Top tracks + assertPlaylist('https://www.youtube.com/playlist?list=MCUS.20142101') + + def test_youtube_matching(self): + self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M')) + self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) # 668 + self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube']) + self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) + self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube']) + self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube']) + + def test_youtube_channel_matching(self): + assertChannel = lambda url: self.assertMatch(url, ['youtube:channel']) + assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM') + assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec') + assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') + + def test_youtube_user_matching(self): + self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:user']) + + def test_youtube_feeds(self): + self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:watchlater']) + self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:subscriptions']) + self.assertMatch('https://www.youtube.com/feed/recommended', ['youtube:recommended']) + self.assertMatch('https://www.youtube.com/my_favorites', ['youtube:favorites']) + + def test_youtube_show_matching(self): + self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) + + def test_youtube_search_matching(self): + self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) + self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) + + def test_youtube_extract(self): + assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) + assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc') + assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc') + assertExtractId('BaW_jenozKc', 'BaW_jenozKc') + + def test_facebook_matching(self): + self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268')) + self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793')) + + def test_no_duplicates(self): + ies = gen_extractors() + for tc in gettestcases(include_onlymatching=True): + url = tc['url'] + for ie in ies: + if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'): + self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url)) + else: + self.assertFalse( + ie.suitable(url), + '%s should not match URL %r . That URL belongs to %s.' % (type(ie).__name__, url, tc['name'])) + + def test_keywords(self): + self.assertMatch(':ytsubs', ['youtube:subscriptions']) + self.assertMatch(':ytsubscriptions', ['youtube:subscriptions']) + self.assertMatch(':ythistory', ['youtube:history']) + + def test_vimeo_matching(self): + self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel']) + self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel']) + self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo']) + self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user']) + self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user']) + self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review']) + + # https://github.com/ytdl-org/youtube-dl/issues/1930 + def test_soundcloud_not_matching_sets(self): + self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set']) + + def test_tumblr(self): + self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr']) + self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr']) + + def test_pbs(self): + # https://github.com/ytdl-org/youtube-dl/issues/2350 + self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs']) + self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs']) + + def test_yahoo_https(self): + # https://github.com/ytdl-org/youtube-dl/issues/2701 + self.assertMatch( + 'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', + ['Yahoo']) + + def test_no_duplicated_ie_names(self): + name_accu = collections.defaultdict(list) + for ie in self.ies: + name_accu[ie.IE_NAME.lower()].append(type(ie).__name__) + for (ie_name, ie_list) in name_accu.items(): + self.assertEqual( + len(ie_list), 1, + 'Multiple extractors with the same IE_NAME "%s" (%s)' % (ie_name, ', '.join(ie_list))) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_cache.py b/test/test_cache.py new file mode 100644 index 000000000..a16160142 --- /dev/null +++ b/test/test_cache.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# coding: utf-8 + +from __future__ import unicode_literals + +import shutil + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from test.helper import FakeYDL +from youtube_dl.cache import Cache + + +def _is_empty(d): + return not bool(os.listdir(d)) + + +def _mkdir(d): + if not os.path.exists(d): + os.mkdir(d) + + +class TestCache(unittest.TestCase): + def setUp(self): + TEST_DIR = os.path.dirname(os.path.abspath(__file__)) + TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata') + _mkdir(TESTDATA_DIR) + self.test_dir = os.path.join(TESTDATA_DIR, 'cache_test') + self.tearDown() + + def tearDown(self): + if os.path.exists(self.test_dir): + shutil.rmtree(self.test_dir) + + def test_cache(self): + ydl = FakeYDL({ + 'cachedir': self.test_dir, + }) + c = Cache(ydl) + obj = {'x': 1, 'y': ['ä', '\\a', True]} + self.assertEqual(c.load('test_cache', 'k.'), None) + c.store('test_cache', 'k.', obj) + self.assertEqual(c.load('test_cache', 'k2'), None) + self.assertFalse(_is_empty(self.test_dir)) + self.assertEqual(c.load('test_cache', 'k.'), obj) + self.assertEqual(c.load('test_cache', 'y'), None) + self.assertEqual(c.load('test_cache2', 'k.'), None) + c.remove() + self.assertFalse(os.path.exists(self.test_dir)) + self.assertEqual(c.load('test_cache', 'k.'), None) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_compat.py b/test/test_compat.py new file mode 100644 index 000000000..86ff389fd --- /dev/null +++ b/test/test_compat.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python +# coding: utf-8 + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + + +from youtube_dl.compat import ( + compat_getenv, + compat_setenv, + compat_etree_Element, + compat_etree_fromstring, + compat_expanduser, + compat_shlex_split, + compat_str, + compat_struct_unpack, + compat_urllib_parse_unquote, + compat_urllib_parse_unquote_plus, + compat_urllib_parse_urlencode, +) + + +class TestCompat(unittest.TestCase): + def test_compat_getenv(self): + test_str = 'тест' + compat_setenv('YOUTUBE_DL_COMPAT_GETENV', test_str) + self.assertEqual(compat_getenv('YOUTUBE_DL_COMPAT_GETENV'), test_str) + + def test_compat_setenv(self): + test_var = 'YOUTUBE_DL_COMPAT_SETENV' + test_str = 'тест' + compat_setenv(test_var, test_str) + compat_getenv(test_var) + self.assertEqual(compat_getenv(test_var), test_str) + + def test_compat_expanduser(self): + old_home = os.environ.get('HOME') + test_str = r'C:\Documents and Settings\тест\Application Data' + compat_setenv('HOME', test_str) + self.assertEqual(compat_expanduser('~'), test_str) + compat_setenv('HOME', old_home or '') + + def test_all_present(self): + import youtube_dl.compat + all_names = youtube_dl.compat.__all__ + present_names = set(filter( + lambda c: '_' in c and not c.startswith('_'), + dir(youtube_dl.compat))) - set(['unicode_literals']) + self.assertEqual(all_names, sorted(present_names)) + + def test_compat_urllib_parse_unquote(self): + self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def') + self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def') + self.assertEqual(compat_urllib_parse_unquote(''), '') + self.assertEqual(compat_urllib_parse_unquote('%'), '%') + self.assertEqual(compat_urllib_parse_unquote('%%'), '%%') + self.assertEqual(compat_urllib_parse_unquote('%%%'), '%%%') + self.assertEqual(compat_urllib_parse_unquote('%2F'), '/') + self.assertEqual(compat_urllib_parse_unquote('%2f'), '/') + self.assertEqual(compat_urllib_parse_unquote('%E6%B4%A5%E6%B3%A2'), '津波') + self.assertEqual( + compat_urllib_parse_unquote(''' +%%a'''), + ''' +%%a''') + self.assertEqual( + compat_urllib_parse_unquote('''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%'''), + '''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%Things%''') + + def test_compat_urllib_parse_unquote_plus(self): + self.assertEqual(compat_urllib_parse_unquote_plus('abc%20def'), 'abc def') + self.assertEqual(compat_urllib_parse_unquote_plus('%7e/abc+def'), '~/abc def') + + def test_compat_urllib_parse_urlencode(self): + self.assertEqual(compat_urllib_parse_urlencode({'abc': 'def'}), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode({'abc': b'def'}), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode({b'abc': 'def'}), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode({b'abc': b'def'}), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode([('abc', 'def')]), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode([('abc', b'def')]), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def') + self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def') + + def test_compat_shlex_split(self): + self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two']) + self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag']) + self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文']) + + def test_compat_etree_Element(self): + try: + compat_etree_Element.items + except AttributeError: + self.fail('compat_etree_Element is not a type') + + def test_compat_etree_fromstring(self): + xml = ''' + + foo + 中文 + spam + + ''' + doc = compat_etree_fromstring(xml.encode('utf-8')) + self.assertTrue(isinstance(doc.attrib['foo'], compat_str)) + self.assertTrue(isinstance(doc.attrib['spam'], compat_str)) + self.assertTrue(isinstance(doc.find('normal').text, compat_str)) + self.assertTrue(isinstance(doc.find('chinese').text, compat_str)) + self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str)) + + def test_compat_etree_fromstring_doctype(self): + xml = ''' + +''' + compat_etree_fromstring(xml) + + def test_struct_unpack(self): + self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,)) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_download.py b/test/test_download.py new file mode 100644 index 000000000..ebe820dfc --- /dev/null +++ b/test/test_download.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import ( + assertGreaterEqual, + expect_warnings, + get_params, + gettestcases, + expect_info_dict, + try_rm, + report_warning, +) + + +import hashlib +import io +import json +import socket + +import youtube_dl.YoutubeDL +from youtube_dl.compat import ( + compat_http_client, + compat_urllib_error, + compat_HTTPError, +) +from youtube_dl.utils import ( + DownloadError, + ExtractorError, + format_bytes, + UnavailableVideoError, +) +from youtube_dl.extractor import get_info_extractor + +RETRIES = 3 + + +class YoutubeDL(youtube_dl.YoutubeDL): + def __init__(self, *args, **kwargs): + self.to_stderr = self.to_screen + self.processed_info_dicts = [] + super(YoutubeDL, self).__init__(*args, **kwargs) + + def report_warning(self, message): + # Don't accept warnings during tests + raise ExtractorError(message) + + def process_info(self, info_dict): + self.processed_info_dicts.append(info_dict) + return super(YoutubeDL, self).process_info(info_dict) + + +def _file_md5(fn): + with open(fn, 'rb') as f: + return hashlib.md5(f.read()).hexdigest() + + +defs = gettestcases() + + +class TestDownload(unittest.TestCase): + # Parallel testing in nosetests. See + # http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html + _multiprocess_shared_ = True + + maxDiff = None + + def __str__(self): + """Identify each test with the `add_ie` attribute, if available.""" + + def strclass(cls): + """From 2.7's unittest; 2.6 had _strclass so we can't import it.""" + return '%s.%s' % (cls.__module__, cls.__name__) + + add_ie = getattr(self, self._testMethodName).add_ie + return '%s (%s)%s:' % (self._testMethodName, + strclass(self.__class__), + ' [%s]' % add_ie if add_ie else '') + + def setUp(self): + self.defs = defs + +# Dynamically generate tests + + +def generator(test_case, tname): + + def test_template(self): + ie = youtube_dl.extractor.get_info_extractor(test_case['name'])() + other_ies = [get_info_extractor(ie_key)() for ie_key in test_case.get('add_ie', [])] + is_playlist = any(k.startswith('playlist') for k in test_case) + test_cases = test_case.get( + 'playlist', [] if is_playlist else [test_case]) + + def print_skipping(reason): + print('Skipping %s: %s' % (test_case['name'], reason)) + if not ie.working(): + print_skipping('IE marked as not _WORKING') + return + + for tc in test_cases: + info_dict = tc.get('info_dict', {}) + if not (info_dict.get('id') and info_dict.get('ext')): + raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?') + + if 'skip' in test_case: + print_skipping(test_case['skip']) + return + for other_ie in other_ies: + if not other_ie.working(): + print_skipping('test depends on %sIE, marked as not WORKING' % other_ie.ie_key()) + return + + params = get_params(test_case.get('params', {})) + params['outtmpl'] = tname + '_' + params['outtmpl'] + if is_playlist and 'playlist' not in test_case: + params.setdefault('extract_flat', 'in_playlist') + params.setdefault('skip_download', True) + + ydl = YoutubeDL(params, auto_init=False) + ydl.add_default_info_extractors() + finished_hook_called = set() + + def _hook(status): + if status['status'] == 'finished': + finished_hook_called.add(status['filename']) + ydl.add_progress_hook(_hook) + expect_warnings(ydl, test_case.get('expected_warnings', [])) + + def get_tc_filename(tc): + return ydl.prepare_filename(tc.get('info_dict', {})) + + res_dict = None + + def try_rm_tcs_files(tcs=None): + if tcs is None: + tcs = test_cases + for tc in tcs: + tc_filename = get_tc_filename(tc) + try_rm(tc_filename) + try_rm(tc_filename + '.part') + try_rm(os.path.splitext(tc_filename)[0] + '.info.json') + try_rm_tcs_files() + try: + try_num = 1 + while True: + try: + # We're not using .download here since that is just a shim + # for outside error handling, and returns the exit code + # instead of the result dict. + res_dict = ydl.extract_info( + test_case['url'], + force_generic_extractor=params.get('force_generic_extractor', False)) + except (DownloadError, ExtractorError) as err: + # Check if the exception is not a network related one + if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503): + raise + + if try_num == RETRIES: + report_warning('%s failed due to network errors, skipping...' % tname) + return + + print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num)) + + try_num += 1 + else: + break + + if is_playlist: + self.assertTrue(res_dict['_type'] in ['playlist', 'multi_video']) + self.assertTrue('entries' in res_dict) + expect_info_dict(self, res_dict, test_case.get('info_dict', {})) + + if 'playlist_mincount' in test_case: + assertGreaterEqual( + self, + len(res_dict['entries']), + test_case['playlist_mincount'], + 'Expected at least %d in playlist %s, but got only %d' % ( + test_case['playlist_mincount'], test_case['url'], + len(res_dict['entries']))) + if 'playlist_count' in test_case: + self.assertEqual( + len(res_dict['entries']), + test_case['playlist_count'], + 'Expected %d entries in playlist %s, but got %d.' % ( + test_case['playlist_count'], + test_case['url'], + len(res_dict['entries']), + )) + if 'playlist_duration_sum' in test_case: + got_duration = sum(e['duration'] for e in res_dict['entries']) + self.assertEqual( + test_case['playlist_duration_sum'], got_duration) + + # Generalize both playlists and single videos to unified format for + # simplicity + if 'entries' not in res_dict: + res_dict['entries'] = [res_dict] + + for tc_num, tc in enumerate(test_cases): + tc_res_dict = res_dict['entries'][tc_num] + # First, check test cases' data against extracted data alone + expect_info_dict(self, tc_res_dict, tc.get('info_dict', {})) + # Now, check downloaded file consistency + tc_filename = get_tc_filename(tc) + if not test_case.get('params', {}).get('skip_download', False): + self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename) + self.assertTrue(tc_filename in finished_hook_called) + expected_minsize = tc.get('file_minsize', 10000) + if expected_minsize is not None: + if params.get('test'): + expected_minsize = max(expected_minsize, 10000) + got_fsize = os.path.getsize(tc_filename) + assertGreaterEqual( + self, got_fsize, expected_minsize, + 'Expected %s to be at least %s, but it\'s only %s ' % + (tc_filename, format_bytes(expected_minsize), + format_bytes(got_fsize))) + if 'md5' in tc: + md5_for_file = _file_md5(tc_filename) + self.assertEqual(tc['md5'], md5_for_file) + # Finally, check test cases' data again but this time against + # extracted data from info JSON file written during processing + info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json' + self.assertTrue( + os.path.exists(info_json_fn), + 'Missing info file %s' % info_json_fn) + with io.open(info_json_fn, encoding='utf-8') as infof: + info_dict = json.load(infof) + expect_info_dict(self, info_dict, tc.get('info_dict', {})) + finally: + try_rm_tcs_files() + if is_playlist and res_dict is not None and res_dict.get('entries'): + # Remove all other files that may have been extracted if the + # extractor returns full results even with extract_flat + res_tcs = [{'info_dict': e} for e in res_dict['entries']] + try_rm_tcs_files(res_tcs) + + return test_template + + +# And add them to TestDownload +for n, test_case in enumerate(defs): + tname = 'test_' + str(test_case['name']) + i = 1 + while hasattr(TestDownload, tname): + tname = 'test_%s_%d' % (test_case['name'], i) + i += 1 + test_method = generator(test_case, tname) + test_method.__name__ = str(tname) + ie_list = test_case.get('add_ie') + test_method.add_ie = ie_list and ','.join(ie_list) + setattr(TestDownload, test_method.__name__, test_method) + del test_method + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py new file mode 100644 index 000000000..750472281 --- /dev/null +++ b/test/test_downloader_http.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python +# coding: utf-8 +from __future__ import unicode_literals + +# Allow direct execution +import os +import re +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import http_server_port, try_rm +from youtube_dl import YoutubeDL +from youtube_dl.compat import compat_http_server +from youtube_dl.downloader.http import HttpFD +from youtube_dl.utils import encodeFilename +import threading + +TEST_DIR = os.path.dirname(os.path.abspath(__file__)) + + +TEST_SIZE = 10 * 1024 + + +class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): + def log_message(self, format, *args): + pass + + def send_content_range(self, total=None): + range_header = self.headers.get('Range') + start = end = None + if range_header: + mobj = re.search(r'^bytes=(\d+)-(\d+)', range_header) + if mobj: + start = int(mobj.group(1)) + end = int(mobj.group(2)) + valid_range = start is not None and end is not None + if valid_range: + content_range = 'bytes %d-%d' % (start, end) + if total: + content_range += '/%d' % total + self.send_header('Content-Range', content_range) + return (end - start + 1) if valid_range else total + + def serve(self, range=True, content_length=True): + self.send_response(200) + self.send_header('Content-Type', 'video/mp4') + size = TEST_SIZE + if range: + size = self.send_content_range(TEST_SIZE) + if content_length: + self.send_header('Content-Length', size) + self.end_headers() + self.wfile.write(b'#' * size) + + def do_GET(self): + if self.path == '/regular': + self.serve() + elif self.path == '/no-content-length': + self.serve(content_length=False) + elif self.path == '/no-range': + self.serve(range=False) + elif self.path == '/no-range-no-content-length': + self.serve(range=False, content_length=False) + else: + assert False + + +class FakeLogger(object): + def debug(self, msg): + pass + + def warning(self, msg): + pass + + def error(self, msg): + pass + + +class TestHttpFD(unittest.TestCase): + def setUp(self): + self.httpd = compat_http_server.HTTPServer( + ('127.0.0.1', 0), HTTPTestRequestHandler) + self.port = http_server_port(self.httpd) + self.server_thread = threading.Thread(target=self.httpd.serve_forever) + self.server_thread.daemon = True + self.server_thread.start() + + def download(self, params, ep): + params['logger'] = FakeLogger() + ydl = YoutubeDL(params) + downloader = HttpFD(ydl, params) + filename = 'testfile.mp4' + try_rm(encodeFilename(filename)) + self.assertTrue(downloader.real_download(filename, { + 'url': 'http://127.0.0.1:%d/%s' % (self.port, ep), + })) + self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE) + try_rm(encodeFilename(filename)) + + def download_all(self, params): + for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'): + self.download(params, ep) + + def test_regular(self): + self.download_all({}) + + def test_chunked(self): + self.download_all({ + 'http_chunk_size': 1000, + }) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_execution.py b/test/test_execution.py new file mode 100644 index 000000000..11661bb68 --- /dev/null +++ b/test/test_execution.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# coding: utf-8 + +from __future__ import unicode_literals + +import unittest + +import sys +import os +import subprocess +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.utils import encodeArgument + +rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + +try: + _DEV_NULL = subprocess.DEVNULL +except AttributeError: + _DEV_NULL = open(os.devnull, 'wb') + + +class TestExecution(unittest.TestCase): + def test_import(self): + subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir) + + def test_module_exec(self): + if sys.version_info >= (2, 7): # Python 2.6 doesn't support package execution + subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL) + + def test_main_exec(self): + subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL) + + def test_cmdline_umlauts(self): + p = subprocess.Popen( + [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'], + cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE) + _, stderr = p.communicate() + self.assertFalse(stderr) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_http.py b/test/test_http.py new file mode 100644 index 000000000..3ee0a5dda --- /dev/null +++ b/test/test_http.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python +# coding: utf-8 +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from test.helper import http_server_port +from youtube_dl import YoutubeDL +from youtube_dl.compat import compat_http_server, compat_urllib_request +import ssl +import threading + +TEST_DIR = os.path.dirname(os.path.abspath(__file__)) + + +class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): + def log_message(self, format, *args): + pass + + def do_GET(self): + if self.path == '/video.html': + self.send_response(200) + self.send_header('Content-Type', 'text/html; charset=utf-8') + self.end_headers() + self.wfile.write(b'
    \s*', + webpage, 'uploader', default=None) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'uploader': uploader, + 'formats': formats, + } + + +class ABCOTVSClipsIE(InfoExtractor): + IE_NAME = 'abcotvs:clips' + _VALID_URL = r'https?://clips\.abcotvs\.com/(?:[^/]+/)*video/(?P\d+)' + _TEST = { + 'url': 'https://clips.abcotvs.com/kabc/video/214814', + 'info_dict': { + 'id': '214814', + 'ext': 'mp4', + 'title': 'SpaceX launch pad explosion destroys rocket, satellite', + 'description': 'md5:9f186e5ad8f490f65409965ee9c7be1b', + 'upload_date': '20160901', + 'timestamp': 1472756695, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json('https://clips.abcotvs.com/vogo/video/getByIds?ids=' + video_id, video_id)['results'][0] + title = video_data['title'] + formats = self._extract_m3u8_formats( + video_data['videoURL'].split('?')[0], video_id, 'mp4') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'thumbnail': video_data.get('thumbnailURL'), + 'duration': int_or_none(video_data.get('duration')), + 'timestamp': int_or_none(video_data.get('pubDate')), + 'formats': formats, + } diff --git a/youtube_dl/extractor/academicearth.py b/youtube_dl/extractor/academicearth.py new file mode 100644 index 000000000..34095501c --- /dev/null +++ b/youtube_dl/extractor/academicearth.py @@ -0,0 +1,41 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class AcademicEarthCourseIE(InfoExtractor): + _VALID_URL = r'^https?://(?:www\.)?academicearth\.org/playlists/(?P[^?#/]+)' + IE_NAME = 'AcademicEarth:Course' + _TEST = { + 'url': 'http://academicearth.org/playlists/laws-of-nature/', + 'info_dict': { + 'id': 'laws-of-nature', + 'title': 'Laws of Nature', + 'description': 'Introduce yourself to the laws of nature with these free online college lectures from Yale, Harvard, and MIT.', + }, + 'playlist_count': 3, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + title = self._html_search_regex( + r'

    ]*?>(.*?)

    ', webpage, 'title') + description = self._html_search_regex( + r'

    ]*?>(.*?)

    ', + webpage, 'description', fatal=False) + urls = re.findall( + r'
  • \s*?', + webpage) + entries = [self.url_result(u) for u in urls] + + return { + '_type': 'playlist', + 'id': playlist_id, + 'title': title, + 'description': description, + 'entries': entries, + } diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py new file mode 100644 index 000000000..b17c792d2 --- /dev/null +++ b/youtube_dl/extractor/acast.py @@ -0,0 +1,135 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import functools + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + clean_html, + float_or_none, + int_or_none, + try_get, + unified_timestamp, + OnDemandPagedList, +) + + +class ACastIE(InfoExtractor): + IE_NAME = 'acast' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:(?:embed|www)\.)?acast\.com/| + play\.acast\.com/s/ + ) + (?P[^/]+)/(?P[^/#?]+) + ''' + _TESTS = [{ + 'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna', + 'md5': '16d936099ec5ca2d5869e3a813ee8dc4', + 'info_dict': { + 'id': '2a92b283-1a75-4ad8-8396-499c641de0d9', + 'ext': 'mp3', + 'title': '2. Raggarmordet - Röster ur det förflutna', + 'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4', + 'timestamp': 1477346700, + 'upload_date': '20161024', + 'duration': 2766.602563, + 'creator': 'Anton Berg & Martin Johnson', + 'series': 'Spår', + 'episode': '2. Raggarmordet - Röster ur det förflutna', + } + }, { + 'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015', + 'only_matching': True, + }, { + 'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22', + 'only_matching': True, + }, { + 'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9', + 'only_matching': True, + }] + + def _real_extract(self, url): + channel, display_id = re.match(self._VALID_URL, url).groups() + s = self._download_json( + 'https://feeder.acast.com/api/v1/shows/%s/episodes/%s' % (channel, display_id), + display_id) + media_url = s['url'] + if re.search(r'[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}', display_id): + episode_url = s.get('episodeUrl') + if episode_url: + display_id = episode_url + else: + channel, display_id = re.match(self._VALID_URL, s['link']).groups() + cast_data = self._download_json( + 'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), + display_id)['result'] + e = cast_data['episode'] + title = e.get('name') or s['title'] + return { + 'id': compat_str(e['id']), + 'display_id': display_id, + 'url': media_url, + 'title': title, + 'description': e.get('summary') or clean_html(e.get('description') or s.get('description')), + 'thumbnail': e.get('image'), + 'timestamp': unified_timestamp(e.get('publishingDate') or s.get('publishDate')), + 'duration': float_or_none(e.get('duration') or s.get('duration')), + 'filesize': int_or_none(e.get('contentLength')), + 'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str), + 'series': try_get(cast_data, lambda x: x['show']['name'], compat_str), + 'season_number': int_or_none(e.get('seasonNumber')), + 'episode': title, + 'episode_number': int_or_none(e.get('episodeNumber')), + } + + +class ACastChannelIE(InfoExtractor): + IE_NAME = 'acast:channel' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?acast\.com/| + play\.acast\.com/s/ + ) + (?P[^/#?]+) + ''' + _TESTS = [{ + 'url': 'https://www.acast.com/todayinfocus', + 'info_dict': { + 'id': '4efc5294-5385-4847-98bd-519799ce5786', + 'title': 'Today in Focus', + 'description': 'md5:9ba5564de5ce897faeb12963f4537a64', + }, + 'playlist_mincount': 35, + }, { + 'url': 'http://play.acast.com/s/ft-banking-weekly', + 'only_matching': True, + }] + _API_BASE_URL = 'https://play.acast.com/api/' + _PAGE_SIZE = 10 + + @classmethod + def suitable(cls, url): + return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url) + + def _fetch_page(self, channel_slug, page): + casts = self._download_json( + self._API_BASE_URL + 'channels/%s/acasts?page=%s' % (channel_slug, page), + channel_slug, note='Download page %d of channel data' % page) + for cast in casts: + yield self.url_result( + 'https://play.acast.com/s/%s/%s' % (channel_slug, cast['url']), + 'ACast', cast['id']) + + def _real_extract(self, url): + channel_slug = self._match_id(url) + channel_data = self._download_json( + self._API_BASE_URL + 'channels/%s' % channel_slug, channel_slug) + entries = OnDemandPagedList(functools.partial( + self._fetch_page, channel_slug), self._PAGE_SIZE) + return self.playlist_result(entries, compat_str( + channel_data['id']), channel_data['name'], channel_data.get('description')) diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py new file mode 100644 index 000000000..5e7c0724e --- /dev/null +++ b/youtube_dl/extractor/addanime.py @@ -0,0 +1,95 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_HTTPError, + compat_str, + compat_urllib_parse_urlencode, + compat_urllib_parse_urlparse, +) +from ..utils import ( + ExtractorError, + qualities, +) + + +class AddAnimeIE(InfoExtractor): + _VALID_URL = r'https?://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P[\w_]+)' + _TESTS = [{ + 'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9', + 'md5': '72954ea10bc979ab5e2eb288b21425a0', + 'info_dict': { + 'id': '24MR3YO5SAS9', + 'ext': 'mp4', + 'description': 'One Piece 606', + 'title': 'One Piece 606', + }, + 'skip': 'Video is gone', + }, { + 'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + try: + webpage = self._download_webpage(url, video_id) + except ExtractorError as ee: + if not isinstance(ee.cause, compat_HTTPError) or \ + ee.cause.code != 503: + raise + + redir_webpage = ee.cause.read().decode('utf-8') + action = self._search_regex( + r'
    ', + redir_webpage, 'redirect vc value') + av = re.search( + r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);', + redir_webpage) + if av is None: + raise ExtractorError('Cannot find redirect math task') + av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3)) + + parsed_url = compat_urllib_parse_urlparse(url) + av_val = av_res + len(parsed_url.netloc) + confirm_url = ( + parsed_url.scheme + '://' + parsed_url.netloc + + action + '?' + + compat_urllib_parse_urlencode({ + 'jschl_vc': vc, 'jschl_answer': compat_str(av_val)})) + self._download_webpage( + confirm_url, video_id, + note='Confirming after redirect') + webpage = self._download_webpage(url, video_id) + + FORMATS = ('normal', 'hq') + quality = qualities(FORMATS) + formats = [] + for format_id in FORMATS: + rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id) + video_url = self._search_regex(rex, webpage, 'video file URLx', + fatal=False) + if not video_url: + continue + formats.append({ + 'format_id': format_id, + 'url': video_url, + 'quality': quality(format_id), + }) + self._sort_formats(formats) + video_title = self._og_search_title(webpage) + video_description = self._og_search_description(webpage) + + return { + '_type': 'video', + 'id': video_id, + 'formats': formats, + 'title': video_title, + 'description': video_description + } diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py new file mode 100644 index 000000000..c95ad2173 --- /dev/null +++ b/youtube_dl/extractor/adn.py @@ -0,0 +1,207 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import binascii +import json +import os +import random + +from .common import InfoExtractor +from ..aes import aes_cbc_decrypt +from ..compat import ( + compat_b64decode, + compat_ord, +) +from ..utils import ( + bytes_to_intlist, + bytes_to_long, + ExtractorError, + float_or_none, + intlist_to_bytes, + long_to_bytes, + pkcs1pad, + strip_or_none, + urljoin, +) + + +class ADNIE(InfoExtractor): + IE_DESC = 'Anime Digital Network' + _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P\d+)' + _TEST = { + 'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites', + 'md5': 'e497370d847fd79d9d4c74be55575c7a', + 'info_dict': { + 'id': '7778', + 'ext': 'mp4', + 'title': 'Blue Exorcist - Kyôto Saga - Épisode 1', + 'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5', + } + } + _BASE_URL = 'http://animedigitalnetwork.fr' + _RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537) + _POS_ALIGN_MAP = { + 'start': 1, + 'end': 3, + } + _LINE_ALIGN_MAP = { + 'middle': 8, + 'end': 4, + } + + @staticmethod + def _ass_subtitles_timecode(seconds): + return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100) + + def _get_subtitles(self, sub_path, video_id): + if not sub_path: + return None + + enc_subtitles = self._download_webpage( + urljoin(self._BASE_URL, sub_path), + video_id, 'Downloading subtitles location', fatal=False) or '{}' + subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location') + if subtitle_location: + enc_subtitles = self._download_webpage( + urljoin(self._BASE_URL, subtitle_location), + video_id, 'Downloading subtitles data', fatal=False, + headers={'Origin': 'https://animedigitalnetwork.fr'}) + if not enc_subtitles: + return None + + # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js + dec_subtitles = intlist_to_bytes(aes_cbc_decrypt( + bytes_to_intlist(compat_b64decode(enc_subtitles[24:])), + bytes_to_intlist(binascii.unhexlify(self._K + '4b8ef13ec1872730')), + bytes_to_intlist(compat_b64decode(enc_subtitles[:24])) + )) + subtitles_json = self._parse_json( + dec_subtitles[:-compat_ord(dec_subtitles[-1])].decode(), + None, fatal=False) + if not subtitles_json: + return None + + subtitles = {} + for sub_lang, sub in subtitles_json.items(): + ssa = '''[Script Info] +ScriptType:V4.00 +[V4 Styles] +Format: Name,Fontname,Fontsize,PrimaryColour,SecondaryColour,TertiaryColour,BackColour,Bold,Italic,BorderStyle,Outline,Shadow,Alignment,MarginL,MarginR,MarginV,AlphaLevel,Encoding +Style: Default,Arial,18,16777215,16777215,16777215,0,-1,0,1,1,0,2,20,20,20,0,0 +[Events] +Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text''' + for current in sub: + start, end, text, line_align, position_align = ( + float_or_none(current.get('startTime')), + float_or_none(current.get('endTime')), + current.get('text'), current.get('lineAlign'), + current.get('positionAlign')) + if start is None or end is None or text is None: + continue + alignment = self._POS_ALIGN_MAP.get(position_align, 2) + self._LINE_ALIGN_MAP.get(line_align, 0) + ssa += os.linesep + 'Dialogue: Marked=0,%s,%s,Default,,0,0,0,,%s%s' % ( + self._ass_subtitles_timecode(start), + self._ass_subtitles_timecode(end), + '{\\a%d}' % alignment if alignment != 2 else '', + text.replace('\n', '\\N').replace('', '{\\i1}').replace('', '{\\i0}')) + + if sub_lang == 'vostf': + sub_lang = 'fr' + subtitles.setdefault(sub_lang, []).extend([{ + 'ext': 'json', + 'data': json.dumps(sub), + }, { + 'ext': 'ssa', + 'data': ssa, + }]) + return subtitles + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + player_config = self._parse_json(self._search_regex( + r'playerConfig\s*=\s*({.+});', webpage, + 'player config', default='{}'), video_id, fatal=False) + if not player_config: + config_url = urljoin(self._BASE_URL, self._search_regex( + r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"', + webpage, 'config url')) + player_config = self._download_json( + config_url, video_id, + 'Downloading player config JSON metadata')['player'] + + video_info = {} + video_info_str = self._search_regex( + r'videoInfo\s*=\s*({.+});', webpage, + 'video info', fatal=False) + if video_info_str: + video_info = self._parse_json( + video_info_str, video_id, fatal=False) or {} + + options = player_config.get('options') or {} + metas = options.get('metas') or {} + links = player_config.get('links') or {} + sub_path = player_config.get('subtitles') + error = None + if not links: + links_url = player_config.get('linksurl') or options['videoUrl'] + token = options['token'] + self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)]) + message = bytes_to_intlist(json.dumps({ + 'k': self._K, + 'e': 60, + 't': token, + })) + padded_message = intlist_to_bytes(pkcs1pad(message, 128)) + n, e = self._RSA_KEY + encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n)) + authorization = base64.b64encode(encrypted_message).decode() + links_data = self._download_json( + urljoin(self._BASE_URL, links_url), video_id, + 'Downloading links JSON metadata', headers={ + 'Authorization': 'Bearer ' + authorization, + }) + links = links_data.get('links') or {} + metas = metas or links_data.get('meta') or {} + sub_path = sub_path or links_data.get('subtitles') or \ + 'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id + sub_path += '&token=' + token + error = links_data.get('error') + title = metas.get('title') or video_info['title'] + + formats = [] + for format_id, qualities in links.items(): + if not isinstance(qualities, dict): + continue + for quality, load_balancer_url in qualities.items(): + load_balancer_data = self._download_json( + load_balancer_url, video_id, + 'Downloading %s %s JSON metadata' % (format_id, quality), + fatal=False) or {} + m3u8_url = load_balancer_data.get('location') + if not m3u8_url: + continue + m3u8_formats = self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id=format_id, fatal=False) + if format_id == 'vf': + for f in m3u8_formats: + f['language'] = 'fr' + formats.extend(m3u8_formats) + if not error: + error = options.get('error') + if not formats and error: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': strip_or_none(metas.get('summary') or video_info.get('resume')), + 'thumbnail': video_info.get('image'), + 'formats': formats, + 'subtitles': self.extract_subtitles(sub_path, video_id), + 'episode': metas.get('subtitle') or video_info.get('videoTitle'), + 'series': video_info.get('playlistTitle'), + } diff --git a/youtube_dl/extractor/adobeconnect.py b/youtube_dl/extractor/adobeconnect.py new file mode 100644 index 000000000..728549eb9 --- /dev/null +++ b/youtube_dl/extractor/adobeconnect.py @@ -0,0 +1,37 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) + + +class AdobeConnectIE(InfoExtractor): + _VALID_URL = r'https?://\w+\.adobeconnect\.com/(?P[\w-]+)' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'(.+?)', webpage, 'title') + qs = compat_parse_qs(self._search_regex(r"swfUrl\s*=\s*'([^']+)'", webpage, 'swf url').split('?')[1]) + is_live = qs.get('isLive', ['false'])[0] == 'true' + formats = [] + for con_string in qs['conStrings'][0].split(','): + formats.append({ + 'format_id': con_string.split('://')[0], + 'app': compat_urlparse.quote('?' + con_string.split('?')[1] + 'flvplayerapp/' + qs['appInstance'][0]), + 'ext': 'flv', + 'play_path': 'mp4:' + qs['streamName'][0], + 'rtmp_conn': 'S:' + qs['ticket'][0], + 'rtmp_live': is_live, + 'url': con_string, + }) + + return { + 'id': video_id, + 'title': self._live_title(title) if is_live else title, + 'formats': formats, + 'is_live': is_live, + } diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py new file mode 100644 index 000000000..38dca1b0a --- /dev/null +++ b/youtube_dl/extractor/adobepass.py @@ -0,0 +1,1572 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import time +import xml.etree.ElementTree as etree + +from .common import InfoExtractor +from ..compat import ( + compat_kwargs, + compat_urlparse, +) +from ..utils import ( + unescapeHTML, + urlencode_postdata, + unified_timestamp, + ExtractorError, + NO_DEFAULT, +) + + +MSO_INFO = { + 'DTV': { + 'name': 'DIRECTV', + 'username_field': 'username', + 'password_field': 'password', + }, + 'ATT': { + 'name': 'AT&T U-verse', + 'username_field': 'userid', + 'password_field': 'password', + }, + 'ATTOTT': { + 'name': 'DIRECTV NOW', + 'username_field': 'email', + 'password_field': 'loginpassword', + }, + 'Rogers': { + 'name': 'Rogers', + 'username_field': 'UserName', + 'password_field': 'UserPassword', + }, + 'Comcast_SSO': { + 'name': 'Comcast XFINITY', + 'username_field': 'user', + 'password_field': 'passwd', + }, + 'TWC': { + 'name': 'Time Warner Cable | Spectrum', + 'username_field': 'Ecom_User_ID', + 'password_field': 'Ecom_Password', + }, + 'Brighthouse': { + 'name': 'Bright House Networks | Spectrum', + 'username_field': 'j_username', + 'password_field': 'j_password', + }, + 'Charter_Direct': { + 'name': 'Charter Spectrum', + 'username_field': 'IDToken1', + 'password_field': 'IDToken2', + }, + 'Verizon': { + 'name': 'Verizon FiOS', + 'username_field': 'IDToken1', + 'password_field': 'IDToken2', + }, + 'thr030': { + 'name': '3 Rivers Communications' + }, + 'com140': { + 'name': 'Access Montana' + }, + 'acecommunications': { + 'name': 'AcenTek' + }, + 'acm010': { + 'name': 'Acme Communications' + }, + 'ada020': { + 'name': 'Adams Cable Service' + }, + 'alb020': { + 'name': 'Albany Mutual Telephone' + }, + 'algona': { + 'name': 'Algona Municipal Utilities' + }, + 'allwest': { + 'name': 'All West Communications' + }, + 'all025': { + 'name': 'Allen\'s Communications' + }, + 'spl010': { + 'name': 'Alliance Communications' + }, + 'all070': { + 'name': 'ALLO Communications' + }, + 'alpine': { + 'name': 'Alpine Communications' + }, + 'hun015': { + 'name': 'American Broadband' + }, + 'nwc010': { + 'name': 'American Broadband Missouri' + }, + 'com130-02': { + 'name': 'American Community Networks' + }, + 'com130-01': { + 'name': 'American Warrior Networks' + }, + 'tom020': { + 'name': 'Amherst Telephone/Tomorrow Valley' + }, + 'tvc020': { + 'name': 'Andycable' + }, + 'arkwest': { + 'name': 'Arkwest Communications' + }, + 'art030': { + 'name': 'Arthur Mutual Telephone Company' + }, + 'arvig': { + 'name': 'Arvig' + }, + 'nttcash010': { + 'name': 'Ashland Home Net' + }, + 'astound': { + 'name': 'Astound (now Wave)' + }, + 'dix030': { + 'name': 'ATC Broadband' + }, + 'ara010': { + 'name': 'ATC Communications' + }, + 'she030-02': { + 'name': 'Ayersville Communications' + }, + 'baldwin': { + 'name': 'Baldwin Lightstream' + }, + 'bal040': { + 'name': 'Ballard TV' + }, + 'cit025': { + 'name': 'Bardstown Cable TV' + }, + 'bay030': { + 'name': 'Bay Country Communications' + }, + 'tel095': { + 'name': 'Beaver Creek Cooperative Telephone' + }, + 'bea020': { + 'name': 'Beaver Valley Cable' + }, + 'bee010': { + 'name': 'Bee Line Cable' + }, + 'wir030': { + 'name': 'Beehive Broadband' + }, + 'bra020': { + 'name': 'BELD' + }, + 'bel020': { + 'name': 'Bellevue Municipal Cable' + }, + 'vol040-01': { + 'name': 'Ben Lomand Connect / BLTV' + }, + 'bev010': { + 'name': 'BEVCOMM' + }, + 'big020': { + 'name': 'Big Sandy Broadband' + }, + 'ble020': { + 'name': 'Bledsoe Telephone Cooperative' + }, + 'bvt010': { + 'name': 'Blue Valley Tele-Communications' + }, + 'bra050': { + 'name': 'Brandenburg Telephone Co.' + }, + 'bte010': { + 'name': 'Bristol Tennessee Essential Services' + }, + 'annearundel': { + 'name': 'Broadstripe' + }, + 'btc010': { + 'name': 'BTC Communications' + }, + 'btc040': { + 'name': 'BTC Vision - Nahunta' + }, + 'bul010': { + 'name': 'Bulloch Telephone Cooperative' + }, + 'but010': { + 'name': 'Butler-Bremer Communications' + }, + 'tel160-csp': { + 'name': 'C Spire SNAP' + }, + 'csicable': { + 'name': 'Cable Services Inc.' + }, + 'cableamerica': { + 'name': 'CableAmerica' + }, + 'cab038': { + 'name': 'CableSouth Media 3' + }, + 'weh010-camtel': { + 'name': 'Cam-Tel Company' + }, + 'car030': { + 'name': 'Cameron Communications' + }, + 'canbytel': { + 'name': 'Canby Telcom' + }, + 'crt020': { + 'name': 'CapRock Tv' + }, + 'car050': { + 'name': 'Carnegie Cable' + }, + 'cas': { + 'name': 'CAS Cable' + }, + 'casscomm': { + 'name': 'CASSCOMM' + }, + 'mid180-02': { + 'name': 'Catalina Broadband Solutions' + }, + 'cccomm': { + 'name': 'CC Communications' + }, + 'nttccde010': { + 'name': 'CDE Lightband' + }, + 'cfunet': { + 'name': 'Cedar Falls Utilities' + }, + 'dem010-01': { + 'name': 'Celect-Bloomer Telephone Area' + }, + 'dem010-02': { + 'name': 'Celect-Bruce Telephone Area' + }, + 'dem010-03': { + 'name': 'Celect-Citizens Connected Area' + }, + 'dem010-04': { + 'name': 'Celect-Elmwood/Spring Valley Area' + }, + 'dem010-06': { + 'name': 'Celect-Mosaic Telecom' + }, + 'dem010-05': { + 'name': 'Celect-West WI Telephone Area' + }, + 'net010-02': { + 'name': 'Cellcom/Nsight Telservices' + }, + 'cen100': { + 'name': 'CentraCom' + }, + 'nttccst010': { + 'name': 'Central Scott / CSTV' + }, + 'cha035': { + 'name': 'Chaparral CableVision' + }, + 'cha050': { + 'name': 'Chariton Valley Communication Corporation, Inc.' + }, + 'cha060': { + 'name': 'Chatmoss Cablevision' + }, + 'nttcche010': { + 'name': 'Cherokee Communications' + }, + 'che050': { + 'name': 'Chesapeake Bay Communications' + }, + 'cimtel': { + 'name': 'Cim-Tel Cable, LLC.' + }, + 'cit180': { + 'name': 'Citizens Cablevision - Floyd, VA' + }, + 'cit210': { + 'name': 'Citizens Cablevision, Inc.' + }, + 'cit040': { + 'name': 'Citizens Fiber' + }, + 'cit250': { + 'name': 'Citizens Mutual' + }, + 'war040': { + 'name': 'Citizens Telephone Corporation' + }, + 'wat025': { + 'name': 'City Of Monroe' + }, + 'wadsworth': { + 'name': 'CityLink' + }, + 'nor100': { + 'name': 'CL Tel' + }, + 'cla010': { + 'name': 'Clarence Telephone and Cedar Communications' + }, + 'ser060': { + 'name': 'Clear Choice Communications' + }, + 'tac020': { + 'name': 'Click! Cable TV' + }, + 'war020': { + 'name': 'CLICK1.NET' + }, + 'cml010': { + 'name': 'CML Telephone Cooperative Association' + }, + 'cns': { + 'name': 'CNS' + }, + 'com160': { + 'name': 'Co-Mo Connect' + }, + 'coa020': { + 'name': 'Coast Communications' + }, + 'coa030': { + 'name': 'Coaxial Cable TV' + }, + 'mid055': { + 'name': 'Cobalt TV (Mid-State Community TV)' + }, + 'col070': { + 'name': 'Columbia Power & Water Systems' + }, + 'col080': { + 'name': 'Columbus Telephone' + }, + 'nor105': { + 'name': 'Communications 1 Cablevision, Inc.' + }, + 'com150': { + 'name': 'Community Cable & Broadband' + }, + 'com020': { + 'name': 'Community Communications Company' + }, + 'coy010': { + 'name': 'commZoom' + }, + 'com025': { + 'name': 'Complete Communication Services' + }, + 'cat020': { + 'name': 'Comporium' + }, + 'com071': { + 'name': 'ComSouth Telesys' + }, + 'consolidatedcable': { + 'name': 'Consolidated' + }, + 'conwaycorp': { + 'name': 'Conway Corporation' + }, + 'coo050': { + 'name': 'Coon Valley Telecommunications Inc' + }, + 'coo080': { + 'name': 'Cooperative Telephone Company' + }, + 'cpt010': { + 'name': 'CP-TEL' + }, + 'cra010': { + 'name': 'Craw-Kan Telephone' + }, + 'crestview': { + 'name': 'Crestview Cable Communications' + }, + 'cross': { + 'name': 'Cross TV' + }, + 'cro030': { + 'name': 'Crosslake Communications' + }, + 'ctc040': { + 'name': 'CTC - Brainerd MN' + }, + 'phe030': { + 'name': 'CTV-Beam - East Alabama' + }, + 'cun010': { + 'name': 'Cunningham Telephone & Cable' + }, + 'dpc010': { + 'name': 'D & P Communications' + }, + 'dak030': { + 'name': 'Dakota Central Telecommunications' + }, + 'nttcdel010': { + 'name': 'Delcambre Telephone LLC' + }, + 'tel160-del': { + 'name': 'Delta Telephone Company' + }, + 'sal040': { + 'name': 'DiamondNet' + }, + 'ind060-dc': { + 'name': 'Direct Communications' + }, + 'doy010': { + 'name': 'Doylestown Cable TV' + }, + 'dic010': { + 'name': 'DRN' + }, + 'dtc020': { + 'name': 'DTC' + }, + 'dtc010': { + 'name': 'DTC Cable (Delhi)' + }, + 'dum010': { + 'name': 'Dumont Telephone Company' + }, + 'dun010': { + 'name': 'Dunkerton Telephone Cooperative' + }, + 'cci010': { + 'name': 'Duo County Telecom' + }, + 'eagle': { + 'name': 'Eagle Communications' + }, + 'weh010-east': { + 'name': 'East Arkansas Cable TV' + }, + 'eatel': { + 'name': 'EATEL Video, LLC' + }, + 'ell010': { + 'name': 'ECTA' + }, + 'emerytelcom': { + 'name': 'Emery Telcom Video LLC' + }, + 'nor200': { + 'name': 'Empire Access' + }, + 'endeavor': { + 'name': 'Endeavor Communications' + }, + 'sun045': { + 'name': 'Enhanced Telecommunications Corporation' + }, + 'mid030': { + 'name': 'enTouch' + }, + 'epb020': { + 'name': 'EPB Smartnet' + }, + 'jea010': { + 'name': 'EPlus Broadband' + }, + 'com065': { + 'name': 'ETC' + }, + 'ete010': { + 'name': 'Etex Communications' + }, + 'fbc-tele': { + 'name': 'F&B Communications' + }, + 'fal010': { + 'name': 'Falcon Broadband' + }, + 'fam010': { + 'name': 'FamilyView CableVision' + }, + 'far020': { + 'name': 'Farmers Mutual Telephone Company' + }, + 'fay010': { + 'name': 'Fayetteville Public Utilities' + }, + 'sal060': { + 'name': 'fibrant' + }, + 'fid010': { + 'name': 'Fidelity Communications' + }, + 'for030': { + 'name': 'FJ Communications' + }, + 'fli020': { + 'name': 'Flint River Communications' + }, + 'far030': { + 'name': 'FMT - Jesup' + }, + 'foo010': { + 'name': 'Foothills Communications' + }, + 'for080': { + 'name': 'Forsyth CableNet' + }, + 'fbcomm': { + 'name': 'Frankfort Plant Board' + }, + 'tel160-fra': { + 'name': 'Franklin Telephone Company' + }, + 'nttcftc010': { + 'name': 'FTC' + }, + 'fullchannel': { + 'name': 'Full Channel, Inc.' + }, + 'gar040': { + 'name': 'Gardonville Cooperative Telephone Association' + }, + 'gbt010': { + 'name': 'GBT Communications, Inc.' + }, + 'tec010': { + 'name': 'Genuine Telecom' + }, + 'clr010': { + 'name': 'Giant Communications' + }, + 'gla010': { + 'name': 'Glasgow EPB' + }, + 'gle010': { + 'name': 'Glenwood Telecommunications' + }, + 'gra060': { + 'name': 'GLW Broadband Inc.' + }, + 'goldenwest': { + 'name': 'Golden West Cablevision' + }, + 'vis030': { + 'name': 'Grantsburg Telcom' + }, + 'gpcom': { + 'name': 'Great Plains Communications' + }, + 'gri010': { + 'name': 'Gridley Cable Inc' + }, + 'hbc010': { + 'name': 'H&B Cable Services' + }, + 'hae010': { + 'name': 'Haefele TV Inc.' + }, + 'htc010': { + 'name': 'Halstad Telephone Company' + }, + 'har005': { + 'name': 'Harlan Municipal Utilities' + }, + 'har020': { + 'name': 'Hart Communications' + }, + 'ced010': { + 'name': 'Hartelco TV' + }, + 'hea040': { + 'name': 'Heart of Iowa Communications Cooperative' + }, + 'htc020': { + 'name': 'Hickory Telephone Company' + }, + 'nttchig010': { + 'name': 'Highland Communication Services' + }, + 'hig030': { + 'name': 'Highland Media' + }, + 'spc010': { + 'name': 'Hilliary Communications' + }, + 'hin020': { + 'name': 'Hinton CATV Co.' + }, + 'hometel': { + 'name': 'HomeTel Entertainment, Inc.' + }, + 'hoodcanal': { + 'name': 'Hood Canal Communications' + }, + 'weh010-hope': { + 'name': 'Hope - Prescott Cable TV' + }, + 'horizoncable': { + 'name': 'Horizon Cable TV, Inc.' + }, + 'hor040': { + 'name': 'Horizon Chillicothe Telephone' + }, + 'htc030': { + 'name': 'HTC Communications Co. - IL' + }, + 'htccomm': { + 'name': 'HTC Communications, Inc. - IA' + }, + 'wal005': { + 'name': 'Huxley Communications' + }, + 'imon': { + 'name': 'ImOn Communications' + }, + 'ind040': { + 'name': 'Independence Telecommunications' + }, + 'rrc010': { + 'name': 'Inland Networks' + }, + 'stc020': { + 'name': 'Innovative Cable TV St Croix' + }, + 'car100': { + 'name': 'Innovative Cable TV St Thomas-St John' + }, + 'icc010': { + 'name': 'Inside Connect Cable' + }, + 'int100': { + 'name': 'Integra Telecom' + }, + 'int050': { + 'name': 'Interstate Telecommunications Coop' + }, + 'irv010': { + 'name': 'Irvine Cable' + }, + 'k2c010': { + 'name': 'K2 Communications' + }, + 'kal010': { + 'name': 'Kalida Telephone Company, Inc.' + }, + 'kal030': { + 'name': 'Kalona Cooperative Telephone Company' + }, + 'kmt010': { + 'name': 'KMTelecom' + }, + 'kpu010': { + 'name': 'KPU Telecommunications' + }, + 'kuh010': { + 'name': 'Kuhn Communications, Inc.' + }, + 'lak130': { + 'name': 'Lakeland Communications' + }, + 'lan010': { + 'name': 'Langco' + }, + 'lau020': { + 'name': 'Laurel Highland Total Communications, Inc.' + }, + 'leh010': { + 'name': 'Lehigh Valley Cooperative Telephone' + }, + 'bra010': { + 'name': 'Limestone Cable/Bracken Cable' + }, + 'loc020': { + 'name': 'LISCO' + }, + 'lit020': { + 'name': 'Litestream' + }, + 'tel140': { + 'name': 'LivCom' + }, + 'loc010': { + 'name': 'LocalTel Communications' + }, + 'weh010-longview': { + 'name': 'Longview - Kilgore Cable TV' + }, + 'lon030': { + 'name': 'Lonsdale Video Ventures, LLC' + }, + 'lns010': { + 'name': 'Lost Nation-Elwood Telephone Co.' + }, + 'nttclpc010': { + 'name': 'LPC Connect' + }, + 'lumos': { + 'name': 'Lumos Networks' + }, + 'madison': { + 'name': 'Madison Communications' + }, + 'mad030': { + 'name': 'Madison County Cable Inc.' + }, + 'nttcmah010': { + 'name': 'Mahaska Communication Group' + }, + 'mar010': { + 'name': 'Marne & Elk Horn Telephone Company' + }, + 'mcc040': { + 'name': 'McClure Telephone Co.' + }, + 'mctv': { + 'name': 'MCTV' + }, + 'merrimac': { + 'name': 'Merrimac Communications Ltd.' + }, + 'metronet': { + 'name': 'Metronet' + }, + 'mhtc': { + 'name': 'MHTC' + }, + 'midhudson': { + 'name': 'Mid-Hudson Cable' + }, + 'midrivers': { + 'name': 'Mid-Rivers Communications' + }, + 'mid045': { + 'name': 'Midstate Communications' + }, + 'mil080': { + 'name': 'Milford Communications' + }, + 'min030': { + 'name': 'MINET' + }, + 'nttcmin010': { + 'name': 'Minford TV' + }, + 'san040-02': { + 'name': 'Mitchell Telecom' + }, + 'mlg010': { + 'name': 'MLGC' + }, + 'mon060': { + 'name': 'Mon-Cre TVE' + }, + 'mou110': { + 'name': 'Mountain Telephone' + }, + 'mou050': { + 'name': 'Mountain Village Cable' + }, + 'mtacomm': { + 'name': 'MTA Communications, LLC' + }, + 'mtc010': { + 'name': 'MTC Cable' + }, + 'med040': { + 'name': 'MTC Technologies' + }, + 'man060': { + 'name': 'MTCC' + }, + 'mtc030': { + 'name': 'MTCO Communications' + }, + 'mul050': { + 'name': 'Mulberry Telecommunications' + }, + 'mur010': { + 'name': 'Murray Electric System' + }, + 'musfiber': { + 'name': 'MUS FiberNET' + }, + 'mpw': { + 'name': 'Muscatine Power & Water' + }, + 'nttcsli010': { + 'name': 'myEVTV.com' + }, + 'nor115': { + 'name': 'NCC' + }, + 'nor260': { + 'name': 'NDTC' + }, + 'nctc': { + 'name': 'Nebraska Central Telecom, Inc.' + }, + 'nel020': { + 'name': 'Nelsonville TV Cable' + }, + 'nem010': { + 'name': 'Nemont' + }, + 'new075': { + 'name': 'New Hope Telephone Cooperative' + }, + 'nor240': { + 'name': 'NICP' + }, + 'cic010': { + 'name': 'NineStar Connect' + }, + 'nktelco': { + 'name': 'NKTelco' + }, + 'nortex': { + 'name': 'Nortex Communications' + }, + 'nor140': { + 'name': 'North Central Telephone Cooperative' + }, + 'nor030': { + 'name': 'Northland Communications' + }, + 'nor075': { + 'name': 'Northwest Communications' + }, + 'nor125': { + 'name': 'Norwood Light Broadband' + }, + 'net010': { + 'name': 'Nsight Telservices' + }, + 'dur010': { + 'name': 'Ntec' + }, + 'nts010': { + 'name': 'NTS Communications' + }, + 'new045': { + 'name': 'NU-Telecom' + }, + 'nulink': { + 'name': 'NuLink' + }, + 'jam030': { + 'name': 'NVC' + }, + 'far035': { + 'name': 'OmniTel Communications' + }, + 'onesource': { + 'name': 'OneSource Communications' + }, + 'cit230': { + 'name': 'Opelika Power Services' + }, + 'daltonutilities': { + 'name': 'OptiLink' + }, + 'mid140': { + 'name': 'OPTURA' + }, + 'ote010': { + 'name': 'OTEC Communication Company' + }, + 'cci020': { + 'name': 'Packerland Broadband' + }, + 'pan010': { + 'name': 'Panora Telco/Guthrie Center Communications' + }, + 'otter': { + 'name': 'Park Region Telephone & Otter Tail Telcom' + }, + 'mid050': { + 'name': 'Partner Communications Cooperative' + }, + 'fib010': { + 'name': 'Pathway' + }, + 'paulbunyan': { + 'name': 'Paul Bunyan Communications' + }, + 'pem020': { + 'name': 'Pembroke Telephone Company' + }, + 'mck010': { + 'name': 'Peoples Rural Telephone Cooperative' + }, + 'pul010': { + 'name': 'PES Energize' + }, + 'phi010': { + 'name': 'Philippi Communications System' + }, + 'phonoscope': { + 'name': 'Phonoscope Cable' + }, + 'pin070': { + 'name': 'Pine Belt Communications, Inc.' + }, + 'weh010-pine': { + 'name': 'Pine Bluff Cable TV' + }, + 'pin060': { + 'name': 'Pineland Telephone Cooperative' + }, + 'cam010': { + 'name': 'Pinpoint Communications' + }, + 'pio060': { + 'name': 'Pioneer Broadband' + }, + 'pioncomm': { + 'name': 'Pioneer Communications' + }, + 'pioneer': { + 'name': 'Pioneer DTV' + }, + 'pla020': { + 'name': 'Plant TiftNet, Inc.' + }, + 'par010': { + 'name': 'PLWC' + }, + 'pro035': { + 'name': 'PMT' + }, + 'vik011': { + 'name': 'Polar Cablevision' + }, + 'pottawatomie': { + 'name': 'Pottawatomie Telephone Co.' + }, + 'premiercomm': { + 'name': 'Premier Communications' + }, + 'psc010': { + 'name': 'PSC' + }, + 'pan020': { + 'name': 'PTCI' + }, + 'qco010': { + 'name': 'QCOL' + }, + 'qua010': { + 'name': 'Quality Cablevision' + }, + 'rad010': { + 'name': 'Radcliffe Telephone Company' + }, + 'car040': { + 'name': 'Rainbow Communications' + }, + 'rai030': { + 'name': 'Rainier Connect' + }, + 'ral010': { + 'name': 'Ralls Technologies' + }, + 'rct010': { + 'name': 'RC Technologies' + }, + 'red040': { + 'name': 'Red River Communications' + }, + 'ree010': { + 'name': 'Reedsburg Utility Commission' + }, + 'mol010': { + 'name': 'Reliance Connects- Oregon' + }, + 'res020': { + 'name': 'Reserve Telecommunications' + }, + 'weh010-resort': { + 'name': 'Resort TV Cable' + }, + 'rld010': { + 'name': 'Richland Grant Telephone Cooperative, Inc.' + }, + 'riv030': { + 'name': 'River Valley Telecommunications Coop' + }, + 'rockportcable': { + 'name': 'Rock Port Cablevision' + }, + 'rsf010': { + 'name': 'RS Fiber' + }, + 'rtc': { + 'name': 'RTC Communication Corp' + }, + 'res040': { + 'name': 'RTC-Reservation Telephone Coop.' + }, + 'rte010': { + 'name': 'RTEC Communications' + }, + 'stc010': { + 'name': 'S&T' + }, + 'san020': { + 'name': 'San Bruno Cable TV' + }, + 'san040-01': { + 'name': 'Santel' + }, + 'sav010': { + 'name': 'SCI Broadband-Savage Communications Inc.' + }, + 'sco050': { + 'name': 'Scottsboro Electric Power Board' + }, + 'scr010': { + 'name': 'Scranton Telephone Company' + }, + 'selco': { + 'name': 'SELCO' + }, + 'she010': { + 'name': 'Shentel' + }, + 'she030': { + 'name': 'Sherwood Mutual Telephone Association, Inc.' + }, + 'ind060-ssc': { + 'name': 'Silver Star Communications' + }, + 'sjoberg': { + 'name': 'Sjoberg\'s Inc.' + }, + 'sou025': { + 'name': 'SKT' + }, + 'sky050': { + 'name': 'SkyBest TV' + }, + 'nttcsmi010': { + 'name': 'Smithville Communications' + }, + 'woo010': { + 'name': 'Solarus' + }, + 'sou075': { + 'name': 'South Central Rural Telephone Cooperative' + }, + 'sou065': { + 'name': 'South Holt Cablevision, Inc.' + }, + 'sou035': { + 'name': 'South Slope Cooperative Communications' + }, + 'spa020': { + 'name': 'Spanish Fork Community Network' + }, + 'spe010': { + 'name': 'Spencer Municipal Utilities' + }, + 'spi005': { + 'name': 'Spillway Communications, Inc.' + }, + 'srt010': { + 'name': 'SRT' + }, + 'cccsmc010': { + 'name': 'St. Maarten Cable TV' + }, + 'sta025': { + 'name': 'Star Communications' + }, + 'sco020': { + 'name': 'STE' + }, + 'uin010': { + 'name': 'STRATA Networks' + }, + 'sum010': { + 'name': 'Sumner Cable TV' + }, + 'pie010': { + 'name': 'Surry TV/PCSI TV' + }, + 'swa010': { + 'name': 'Swayzee Communications' + }, + 'sweetwater': { + 'name': 'Sweetwater Cable Television Co' + }, + 'weh010-talequah': { + 'name': 'Tahlequah Cable TV' + }, + 'tct': { + 'name': 'TCT' + }, + 'tel050': { + 'name': 'Tele-Media Company' + }, + 'com050': { + 'name': 'The Community Agency' + }, + 'thr020': { + 'name': 'Three River' + }, + 'cab140': { + 'name': 'Town & Country Technologies' + }, + 'tra010': { + 'name': 'Trans-Video' + }, + 'tre010': { + 'name': 'Trenton TV Cable Company' + }, + 'tcc': { + 'name': 'Tri County Communications Cooperative' + }, + 'tri025': { + 'name': 'TriCounty Telecom' + }, + 'tri110': { + 'name': 'TrioTel Communications, Inc.' + }, + 'tro010': { + 'name': 'Troy Cablevision, Inc.' + }, + 'tsc': { + 'name': 'TSC' + }, + 'cit220': { + 'name': 'Tullahoma Utilities Board' + }, + 'tvc030': { + 'name': 'TV Cable of Rensselaer' + }, + 'tvc015': { + 'name': 'TVC Cable' + }, + 'cab180': { + 'name': 'TVision' + }, + 'twi040': { + 'name': 'Twin Lakes' + }, + 'tvtinc': { + 'name': 'Twin Valley' + }, + 'uis010': { + 'name': 'Union Telephone Company' + }, + 'uni110': { + 'name': 'United Communications - TN' + }, + 'uni120': { + 'name': 'United Services' + }, + 'uss020': { + 'name': 'US Sonet' + }, + 'cab060': { + 'name': 'USA Communications' + }, + 'she005': { + 'name': 'USA Communications/Shellsburg, IA' + }, + 'val040': { + 'name': 'Valley TeleCom Group' + }, + 'val025': { + 'name': 'Valley Telecommunications' + }, + 'val030': { + 'name': 'Valparaiso Broadband' + }, + 'cla050': { + 'name': 'Vast Broadband' + }, + 'sul015': { + 'name': 'Venture Communications Cooperative, Inc.' + }, + 'ver025': { + 'name': 'Vernon Communications Co-op' + }, + 'weh010-vicksburg': { + 'name': 'Vicksburg Video' + }, + 'vis070': { + 'name': 'Vision Communications' + }, + 'volcanotel': { + 'name': 'Volcano Vision, Inc.' + }, + 'vol040-02': { + 'name': 'VolFirst / BLTV' + }, + 'ver070': { + 'name': 'VTel' + }, + 'nttcvtx010': { + 'name': 'VTX1' + }, + 'bci010-02': { + 'name': 'Vyve Broadband' + }, + 'wab020': { + 'name': 'Wabash Mutual Telephone' + }, + 'waitsfield': { + 'name': 'Waitsfield Cable' + }, + 'wal010': { + 'name': 'Walnut Communications' + }, + 'wavebroadband': { + 'name': 'Wave' + }, + 'wav030': { + 'name': 'Waverly Communications Utility' + }, + 'wbi010': { + 'name': 'WBI' + }, + 'web020': { + 'name': 'Webster-Calhoun Cooperative Telephone Association' + }, + 'wes005': { + 'name': 'West Alabama TV Cable' + }, + 'carolinata': { + 'name': 'West Carolina Communications' + }, + 'wct010': { + 'name': 'West Central Telephone Association' + }, + 'wes110': { + 'name': 'West River Cooperative Telephone Company' + }, + 'ani030': { + 'name': 'WesTel Systems' + }, + 'westianet': { + 'name': 'Western Iowa Networks' + }, + 'nttcwhi010': { + 'name': 'Whidbey Telecom' + }, + 'weh010-white': { + 'name': 'White County Cable TV' + }, + 'wes130': { + 'name': 'Wiatel' + }, + 'wik010': { + 'name': 'Wiktel' + }, + 'wil070': { + 'name': 'Wilkes Communications, Inc./RiverStreet Networks' + }, + 'wil015': { + 'name': 'Wilson Communications' + }, + 'win010': { + 'name': 'Windomnet/SMBS' + }, + 'win090': { + 'name': 'Windstream Cable TV' + }, + 'wcta': { + 'name': 'Winnebago Cooperative Telecom Association' + }, + 'wtc010': { + 'name': 'WTC' + }, + 'wil040': { + 'name': 'WTC Communications, Inc.' + }, + 'wya010': { + 'name': 'Wyandotte Cable' + }, + 'hin020-02': { + 'name': 'X-Stream Services' + }, + 'xit010': { + 'name': 'XIT Communications' + }, + 'yel010': { + 'name': 'Yelcot Communications' + }, + 'mid180-01': { + 'name': 'yondoo' + }, + 'cou060': { + 'name': 'Zito Media' + }, +} + + +class AdobePassIE(InfoExtractor): + _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' + _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' + _MVPD_CACHE = 'ap-mvpd' + + _DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page' + + def _download_webpage_handle(self, *args, **kwargs): + headers = self.geo_verification_headers() + headers.update(kwargs.get('headers', {})) + kwargs['headers'] = headers + return super(AdobePassIE, self)._download_webpage_handle( + *args, **compat_kwargs(kwargs)) + + @staticmethod + def _get_mvpd_resource(provider_id, title, guid, rating): + channel = etree.Element('channel') + channel_title = etree.SubElement(channel, 'title') + channel_title.text = provider_id + item = etree.SubElement(channel, 'item') + resource_title = etree.SubElement(item, 'title') + resource_title.text = title + resource_guid = etree.SubElement(item, 'guid') + resource_guid.text = guid + resource_rating = etree.SubElement(item, 'media:rating') + resource_rating.attrib = {'scheme': 'urn:v-chip'} + resource_rating.text = rating + return '' + etree.tostring(channel).decode() + '' + + def _extract_mvpd_auth(self, url, video_id, requestor_id, resource): + def xml_text(xml_str, tag): + return self._search_regex( + '<%s>(.+?)' % (tag, tag), xml_str, tag) + + def is_expired(token, date_ele): + token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) + return token_expires and token_expires <= int(time.time()) + + def post_form(form_page_res, note, data={}): + form_page, urlh = form_page_res + post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') + if not re.match(r'https?://', post_url): + post_url = compat_urlparse.urljoin(urlh.geturl(), post_url) + form_data = self._hidden_inputs(form_page) + form_data.update(data) + return self._download_webpage_handle( + post_url, video_id, note, data=urlencode_postdata(form_data), headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + }) + + def raise_mvpd_required(): + raise ExtractorError( + 'This video is only available for users of participating TV providers. ' + 'Use --ap-mso to specify Adobe Pass Multiple-system operator Identifier ' + 'and --ap-username and --ap-password or --netrc to provide account credentials.', expected=True) + + def extract_redirect_url(html, url=None, fatal=False): + # TODO: eliminate code duplication with generic extractor and move + # redirection code into _download_webpage_handle + REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' + redirect_url = self._search_regex( + r'(?i)Resume' in mvpd_confirm_page: + post_form(mvpd_confirm_page_res, 'Confirming Login') + elif mso_id == 'Verizon': + # In general, if you're connecting from a Verizon-assigned IP, + # you will not actually pass your credentials. + provider_redirect_page, urlh = provider_redirect_page_res + if 'Please wait ...' in provider_redirect_page: + saml_redirect_url = self._html_search_regex( + r'self\.parent\.location=(["\'])(?P.+?)\1', + provider_redirect_page, + 'SAML Redirect URL', group='url') + saml_login_page = self._download_webpage( + saml_redirect_url, video_id, + 'Downloading SAML Login Page') + else: + saml_login_page_res = post_form( + provider_redirect_page_res, 'Logging in', { + mso_info['username_field']: username, + mso_info['password_field']: password, + }) + saml_login_page, urlh = saml_login_page_res + if 'Please try again.' in saml_login_page: + raise ExtractorError( + 'We\'re sorry, but either the User ID or Password entered is not correct.') + saml_login_url = self._search_regex( + r'xmlHttp\.open\("POST"\s*,\s*(["\'])(?P.+?)\1', + saml_login_page, 'SAML Login URL', group='url') + saml_response_json = self._download_json( + saml_login_url, video_id, 'Downloading SAML Response', + headers={'Content-Type': 'text/xml'}) + self._download_webpage( + saml_response_json['targetValue'], video_id, + 'Confirming Login', data=urlencode_postdata({ + 'SAMLResponse': saml_response_json['SAMLResponse'], + 'RelayState': saml_response_json['RelayState'] + }), headers={ + 'Content-Type': 'application/x-www-form-urlencoded' + }) + else: + # Some providers (e.g. DIRECTV NOW) have another meta refresh + # based redirect that should be followed. + provider_redirect_page, urlh = provider_redirect_page_res + provider_refresh_redirect_url = extract_redirect_url( + provider_redirect_page, url=urlh.geturl()) + if provider_refresh_redirect_url: + provider_redirect_page_res = self._download_webpage_handle( + provider_refresh_redirect_url, video_id, + 'Downloading Provider Redirect Page (meta refresh)') + provider_login_page_res = post_form( + provider_redirect_page_res, self._DOWNLOADING_LOGIN_PAGE) + mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', { + mso_info.get('username_field', 'username'): username, + mso_info.get('password_field', 'password'): password, + }) + if mso_id != 'Rogers': + post_form(mvpd_confirm_page_res, 'Confirming Login') + + session = self._download_webpage( + self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, + 'Retrieving Session', data=urlencode_postdata({ + '_method': 'GET', + 'requestor_id': requestor_id, + }), headers=mvpd_headers) + if 'fr|de|es|jp)/)?watch/(?P[^/]+)/(?P[^/]+)' + + _TEST = { + 'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/', + 'md5': '9bc5727bcdd55251f35ad311ca74fa1e', + 'info_dict': { + 'id': '10981', + 'ext': 'mp4', + 'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop', + 'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311', + 'thumbnail': r're:https?://.*\.jpg$', + 'upload_date': '20110914', + 'duration': 60, + 'view_count': int, + }, + } + + def _real_extract(self, url): + language, show_urlname, urlname = re.match(self._VALID_URL, url).groups() + if not language: + language = 'en' + + video_data = self._download_json( + self._API_BASE_URL + 'episode/get/?language=%s&show_urlname=%s&urlname=%s&disclosure=standard' % (language, show_urlname, urlname), + urlname)['data'][0] + + formats = [{ + 'url': source['url'], + 'format_id': source.get('quality_level') or source['url'].split('-')[-1].split('.')[0] or None, + 'width': int_or_none(source.get('width')), + 'height': int_or_none(source.get('height')), + 'tbr': int_or_none(source.get('video_data_rate')), + } for source in video_data['videos']] + self._sort_formats(formats) + + return { + 'id': compat_str(video_data['id']), + 'title': video_data['title'], + 'description': video_data.get('description'), + 'thumbnail': video_data.get('thumbnail'), + 'upload_date': unified_strdate(video_data.get('start_date')), + 'duration': parse_duration(video_data.get('duration')), + 'view_count': str_to_int(video_data.get('playcount')), + 'formats': formats, + } + + +class AdobeTVPlaylistBaseIE(AdobeTVBaseIE): + def _parse_page_data(self, page_data): + return [self.url_result(self._get_element_url(element_data)) for element_data in page_data] + + def _extract_playlist_entries(self, url, display_id): + page = self._download_json(url, display_id) + entries = self._parse_page_data(page['data']) + for page_num in range(2, page['paging']['pages'] + 1): + entries.extend(self._parse_page_data( + self._download_json(url + '&page=%d' % page_num, display_id)['data'])) + return entries + + +class AdobeTVShowIE(AdobeTVPlaylistBaseIE): + _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?show/(?P[^/]+)' + + _TEST = { + 'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost', + 'info_dict': { + 'id': '36', + 'title': 'The Complete Picture with Julieanne Kost', + 'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27', + }, + 'playlist_mincount': 136, + } + + def _get_element_url(self, element_data): + return element_data['urls'][0] + + def _real_extract(self, url): + language, show_urlname = re.match(self._VALID_URL, url).groups() + if not language: + language = 'en' + query = 'language=%s&show_urlname=%s' % (language, show_urlname) + + show_data = self._download_json(self._API_BASE_URL + 'show/get/?%s' % query, show_urlname)['data'][0] + + return self.playlist_result( + self._extract_playlist_entries(self._API_BASE_URL + 'episode/?%s' % query, show_urlname), + compat_str(show_data['id']), + show_data['show_name'], + show_data['show_description']) + + +class AdobeTVChannelIE(AdobeTVPlaylistBaseIE): + _VALID_URL = r'https?://tv\.adobe\.com/(?:(?Pfr|de|es|jp)/)?channel/(?P[^/]+)(?:/(?P[^/]+))?' + + _TEST = { + 'url': 'http://tv.adobe.com/channel/development', + 'info_dict': { + 'id': 'development', + }, + 'playlist_mincount': 96, + } + + def _get_element_url(self, element_data): + return element_data['url'] + + def _real_extract(self, url): + language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups() + if not language: + language = 'en' + query = 'language=%s&channel_urlname=%s' % (language, channel_urlname) + if category_urlname: + query += '&category_urlname=%s' % category_urlname + + return self.playlist_result( + self._extract_playlist_entries(self._API_BASE_URL + 'show/?%s' % query, channel_urlname), + channel_urlname) + + +class AdobeTVVideoIE(InfoExtractor): + _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P\d+)' + + _TEST = { + # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners + 'url': 'https://video.tv.adobe.com/v/2456/', + 'md5': '43662b577c018ad707a63766462b1e87', + 'info_dict': { + 'id': '2456', + 'ext': 'mp4', + 'title': 'New experience with Acrobat DC', + 'description': 'New experience with Acrobat DC', + 'duration': 248.667, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_data = self._parse_json(self._search_regex( + r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id) + + formats = [{ + 'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')), + 'url': source['src'], + 'width': int_or_none(source.get('width')), + 'height': int_or_none(source.get('height')), + 'tbr': int_or_none(source.get('bitrate')), + } for source in video_data['sources']] + self._sort_formats(formats) + + # For both metadata and downloaded files the duration varies among + # formats. I just pick the max one + duration = max(filter(None, [ + float_or_none(source.get('duration'), scale=1000) + for source in video_data['sources']])) + + subtitles = {} + for translation in video_data.get('translations', []): + lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium']) + if lang_id not in subtitles: + subtitles[lang_id] = [] + subtitles[lang_id].append({ + 'url': translation['vttPath'], + 'ext': 'vtt', + }) + + return { + 'id': video_id, + 'formats': formats, + 'title': video_data['title'], + 'description': video_data.get('description'), + 'thumbnail': video_data['video'].get('poster'), + 'duration': duration, + 'subtitles': subtitles, + } diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py new file mode 100644 index 000000000..8d1d9ac7d --- /dev/null +++ b/youtube_dl/extractor/adultswim.py @@ -0,0 +1,202 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .turner import TurnerBaseIE +from ..utils import ( + determine_ext, + float_or_none, + int_or_none, + mimetype2ext, + parse_age_limit, + parse_iso8601, + strip_or_none, + try_get, +) + + +class AdultSwimIE(TurnerBaseIE): + _VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P[^/?#]+)(?:/(?P[^/?#]+))?' + + _TESTS = [{ + 'url': 'http://adultswim.com/videos/rick-and-morty/pilot', + 'info_dict': { + 'id': 'rQxZvXQ4ROaSOqq-or2Mow', + 'ext': 'mp4', + 'title': 'Rick and Morty - Pilot', + 'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.', + 'timestamp': 1543294800, + 'upload_date': '20181127', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'expected_warnings': ['Unable to download f4m manifest'], + }, { + 'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/', + 'info_dict': { + 'id': 'sY3cMUR_TbuE4YmdjzbIcQ', + 'ext': 'mp4', + 'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine', + 'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.', + 'upload_date': '20080124', + 'timestamp': 1201150800, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'skip': '404 Not Found', + }, { + 'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/', + 'info_dict': { + 'id': 'I0LQFQkaSUaFp8PnAWHhoQ', + 'ext': 'mp4', + 'title': 'Decker - Inside Decker: A New Hero', + 'description': 'The guys recap the conclusion of the season. They announce a new hero, take a peek into the Victorville Film Archive and welcome back the talented James Dean.', + 'timestamp': 1469480460, + 'upload_date': '20160725', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'expected_warnings': ['Unable to download f4m manifest'], + }, { + 'url': 'http://www.adultswim.com/videos/attack-on-titan', + 'info_dict': { + 'id': 'attack-on-titan', + 'title': 'Attack on Titan', + 'description': 'md5:41caa9416906d90711e31dc00cb7db7e', + }, + 'playlist_mincount': 12, + }, { + 'url': 'http://www.adultswim.com/videos/streams/williams-stream', + 'info_dict': { + 'id': 'd8DEBj7QRfetLsRgFnGEyg', + 'ext': 'mp4', + 'title': r're:^Williams Stream \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', + 'description': 'original programming', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'skip': '404 Not Found', + }] + + def _real_extract(self, url): + show_path, episode_path = re.match(self._VALID_URL, url).groups() + display_id = episode_path or show_path + query = '''query { + getShowBySlug(slug:"%s") { + %%s + } +}''' % show_path + if episode_path: + query = query % '''title + getVideoBySlug(slug:"%s") { + _id + auth + description + duration + episodeNumber + launchDate + mediaID + seasonNumber + poster + title + tvRating + }''' % episode_path + ['getVideoBySlug'] + else: + query = query % '''metaDescription + title + videos(first:1000,sort:["episode_number"]) { + edges { + node { + _id + slug + } + } + }''' + show_data = self._download_json( + 'https://www.adultswim.com/api/search', display_id, + data=json.dumps({'query': query}).encode(), + headers={'Content-Type': 'application/json'})['data']['getShowBySlug'] + if episode_path: + video_data = show_data['getVideoBySlug'] + video_id = video_data['_id'] + episode_title = title = video_data['title'] + series = show_data.get('title') + if series: + title = '%s - %s' % (series, title) + info = { + 'id': video_id, + 'title': title, + 'description': strip_or_none(video_data.get('description')), + 'duration': float_or_none(video_data.get('duration')), + 'formats': [], + 'subtitles': {}, + 'age_limit': parse_age_limit(video_data.get('tvRating')), + 'thumbnail': video_data.get('poster'), + 'timestamp': parse_iso8601(video_data.get('launchDate')), + 'series': series, + 'season_number': int_or_none(video_data.get('seasonNumber')), + 'episode': episode_title, + 'episode_number': int_or_none(video_data.get('episodeNumber')), + } + + auth = video_data.get('auth') + media_id = video_data.get('mediaID') + if media_id: + info.update(self._extract_ngtv_info(media_id, { + # CDN_TOKEN_APP_ID from: + # https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js + 'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE', + }, { + 'url': url, + 'site_name': 'AdultSwim', + 'auth_required': auth, + })) + + if not auth: + extract_data = self._download_json( + 'https://www.adultswim.com/api/shows/v1/videos/' + video_id, + video_id, query={'fields': 'stream'}, fatal=False) or {} + assets = try_get(extract_data, lambda x: x['data']['video']['stream']['assets'], list) or [] + for asset in assets: + asset_url = asset.get('url') + if not asset_url: + continue + ext = determine_ext(asset_url, mimetype2ext(asset.get('mime_type'))) + if ext == 'm3u8': + info['formats'].extend(self._extract_m3u8_formats( + asset_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + elif ext == 'f4m': + continue + # info['formats'].extend(self._extract_f4m_formats( + # asset_url, video_id, f4m_id='hds', fatal=False)) + elif ext in ('scc', 'ttml', 'vtt'): + info['subtitles'].setdefault('en', []).append({ + 'url': asset_url, + }) + self._sort_formats(info['formats']) + + return info + else: + entries = [] + for edge in show_data.get('videos', {}).get('edges', []): + video = edge.get('node') or {} + slug = video.get('slug') + if not slug: + continue + entries.append(self.url_result( + 'http://adultswim.com/videos/%s/%s' % (show_path, slug), + 'AdultSwim', video.get('_id'))) + return self.playlist_result( + entries, show_path, show_data.get('title'), + strip_or_none(show_data.get('metaDescription'))) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py new file mode 100644 index 000000000..611b948f5 --- /dev/null +++ b/youtube_dl/extractor/aenetworks.py @@ -0,0 +1,247 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .theplatform import ThePlatformIE +from ..utils import ( + extract_attributes, + ExtractorError, + int_or_none, + smuggle_url, + update_url_query, +) +from ..compat import ( + compat_urlparse, +) + + +class AENetworksBaseIE(ThePlatformIE): + _THEPLATFORM_KEY = 'crazyjava' + _THEPLATFORM_SECRET = 's3cr3t' + + def _extract_aen_smil(self, smil_url, video_id, auth=None): + query = {'mbr': 'true'} + if auth: + query['auth'] = auth + TP_SMIL_QUERY = [{ + 'assetTypes': 'high_video_ak', + 'switch': 'hls_high_ak' + }, { + 'assetTypes': 'high_video_s3' + }, { + 'assetTypes': 'high_video_s3', + 'switch': 'hls_ingest_fastly' + }] + formats = [] + subtitles = {} + last_e = None + for q in TP_SMIL_QUERY: + q.update(query) + m_url = update_url_query(smil_url, q) + m_url = self._sign_url(m_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET) + try: + tp_formats, tp_subtitles = self._extract_theplatform_smil( + m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes'])) + except ExtractorError as e: + last_e = e + continue + formats.extend(tp_formats) + subtitles = self._merge_subtitles(subtitles, tp_subtitles) + if last_e and not formats: + raise last_e + self._sort_formats(formats) + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + } + + +class AENetworksIE(AENetworksBaseIE): + IE_NAME = 'aenetworks' + IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?P + (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com| + fyi\.tv + )/ + (?: + shows/(?P[^/]+(?:/[^/]+){0,2})| + movies/(?P[^/]+)(?:/full-movie)?| + specials/(?P[^/]+)/(?:full-special|preview-)| + collections/[^/]+/(?P[^/]+) + ) + ''' + _TESTS = [{ + 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', + 'info_dict': { + 'id': '22253814', + 'ext': 'mp4', + 'title': 'Winter is Coming', + 'description': 'md5:641f424b7a19d8e24f26dea22cf59d74', + 'timestamp': 1338306241, + 'upload_date': '20120529', + 'uploader': 'AENE-NEW', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'add_ie': ['ThePlatform'], + }, { + 'url': 'http://www.history.com/shows/ancient-aliens/season-1', + 'info_dict': { + 'id': '71889446852', + }, + 'playlist_mincount': 5, + }, { + 'url': 'http://www.mylifetime.com/shows/atlanta-plastic', + 'info_dict': { + 'id': 'SERIES4317', + 'title': 'Atlanta Plastic', + }, + 'playlist_mincount': 2, + }, { + 'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1', + 'only_matching': True + }, { + 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', + 'only_matching': True + }, { + 'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6', + 'only_matching': True + }, { + 'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie', + 'only_matching': True + }, { + 'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us', + 'only_matching': True + }, { + 'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special', + 'only_matching': True + }, { + 'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward', + 'only_matching': True + }, { + 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story', + 'only_matching': True + }] + _DOMAIN_TO_REQUESTOR_ID = { + 'history.com': 'HISTORY', + 'aetv.com': 'AETV', + 'mylifetime.com': 'LIFETIME', + 'lifetimemovieclub.com': 'LIFETIMEMOVIECLUB', + 'fyi.tv': 'FYI', + } + + def _real_extract(self, url): + domain, show_path, movie_display_id, special_display_id, collection_display_id = re.match(self._VALID_URL, url).groups() + display_id = show_path or movie_display_id or special_display_id or collection_display_id + webpage = self._download_webpage(url, display_id, headers=self.geo_verification_headers()) + if show_path: + url_parts = show_path.split('/') + url_parts_len = len(url_parts) + if url_parts_len == 1: + entries = [] + for season_url_path in re.findall(r'(?s)]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage): + entries.append(self.url_result( + compat_urlparse.urljoin(url, season_url_path), 'AENetworks')) + if entries: + return self.playlist_result( + entries, self._html_search_meta('aetn:SeriesId', webpage), + self._html_search_meta('aetn:SeriesTitle', webpage)) + else: + # single season + url_parts_len = 2 + if url_parts_len == 2: + entries = [] + for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage): + episode_attributes = extract_attributes(episode_item) + episode_url = compat_urlparse.urljoin( + url, episode_attributes['data-canonical']) + entries.append(self.url_result( + episode_url, 'AENetworks', + episode_attributes.get('data-videoid') or episode_attributes.get('data-video-id'))) + return self.playlist_result( + entries, self._html_search_meta('aetn:SeasonId', webpage)) + + video_id = self._html_search_meta('aetn:VideoID', webpage) + media_url = self._search_regex( + [r"media_url\s*=\s*'(?P[^']+)'", + r'data-media-url=(?P(?:https?:)?//[^\s>]+)', + r'data-media-url=(["\'])(?P(?:(?!\1).)+?)\1'], + webpage, 'video url', group='url') + theplatform_metadata = self._download_theplatform_metadata(self._search_regex( + r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) + info = self._parse_theplatform_metadata(theplatform_metadata) + auth = None + if theplatform_metadata.get('AETN$isBehindWall'): + requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain] + resource = self._get_mvpd_resource( + requestor_id, theplatform_metadata['title'], + theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'), + theplatform_metadata['ratings'][0]['rating']) + auth = self._extract_mvpd_auth( + url, video_id, requestor_id, resource) + info.update(self._search_json_ld(webpage, video_id, fatal=False)) + info.update(self._extract_aen_smil(media_url, video_id, auth)) + return info + + +class HistoryTopicIE(AENetworksBaseIE): + IE_NAME = 'history:topic' + IE_DESC = 'History.com Topic' + _VALID_URL = r'https?://(?:www\.)?history\.com/topics/[^/]+/(?P[\w+-]+?)-video' + _TESTS = [{ + 'url': 'https://www.history.com/topics/valentines-day/history-of-valentines-day-video', + 'info_dict': { + 'id': '40700995724', + 'ext': 'mp4', + 'title': "History of Valentine’s Day", + 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', + 'timestamp': 1375819729, + 'upload_date': '20130806', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'add_ie': ['ThePlatform'], + }] + + def theplatform_url_result(self, theplatform_url, video_id, query): + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': smuggle_url( + update_url_query(theplatform_url, query), + { + 'sig': { + 'key': self._THEPLATFORM_KEY, + 'secret': self._THEPLATFORM_SECRET, + }, + 'force_smil_url': True + }), + 'ie_key': 'ThePlatform', + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + r']+src="[^"]+\btpid=(\d+)', webpage, 'tpid') + result = self._download_json( + 'https://feeds.video.aetnd.com/api/v2/history/videos', + video_id, query={'filter[id]': video_id})['results'][0] + title = result['title'] + info = self._extract_aen_smil(result['publicUrl'], video_id) + info.update({ + 'title': title, + 'description': result.get('description'), + 'duration': int_or_none(result.get('duration')), + 'timestamp': int_or_none(result.get('added'), 1000), + }) + return info diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dl/extractor/afreecatv.py new file mode 100644 index 000000000..6275e5209 --- /dev/null +++ b/youtube_dl/extractor/afreecatv.py @@ -0,0 +1,367 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_xpath +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, + url_or_none, + urlencode_postdata, + xpath_text, +) + + +class AfreecaTVIE(InfoExtractor): + IE_NAME = 'afreecatv' + IE_DESC = 'afreecatv.com' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:(?:live|afbbs|www)\.)?afreeca(?:tv)?\.com(?::\d+)? + (?: + /app/(?:index|read_ucc_bbs)\.cgi| + /player/[Pp]layer\.(?:swf|html) + )\?.*?\bnTitleNo=| + vod\.afreecatv\.com/PLAYER/STATION/ + ) + (?P\d+) + ''' + _NETRC_MACHINE = 'afreecatv' + _TESTS = [{ + 'url': 'http://live.afreecatv.com:8079/app/index.cgi?szType=read_ucc_bbs&szBjId=dailyapril&nStationNo=16711924&nBbsNo=18605867&nTitleNo=36164052&szSkin=', + 'md5': 'f72c89fe7ecc14c1b5ce506c4996046e', + 'info_dict': { + 'id': '36164052', + 'ext': 'mp4', + 'title': '데일리 에이프릴 요정들의 시상식!', + 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', + 'uploader': 'dailyapril', + 'uploader_id': 'dailyapril', + 'upload_date': '20160503', + }, + 'skip': 'Video is gone', + }, { + 'url': 'http://afbbs.afreecatv.com:8080/app/read_ucc_bbs.cgi?nStationNo=16711924&nTitleNo=36153164&szBjId=dailyapril&nBbsNo=18605867', + 'info_dict': { + 'id': '36153164', + 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", + 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', + 'uploader': 'dailyapril', + 'uploader_id': 'dailyapril', + }, + 'playlist_count': 2, + 'playlist': [{ + 'md5': 'd8b7c174568da61d774ef0203159bf97', + 'info_dict': { + 'id': '36153164_1', + 'ext': 'mp4', + 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", + 'upload_date': '20160502', + }, + }, { + 'md5': '58f2ce7f6044e34439ab2d50612ab02b', + 'info_dict': { + 'id': '36153164_2', + 'ext': 'mp4', + 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", + 'upload_date': '20160502', + }, + }], + 'skip': 'Video is gone', + }, { + 'url': 'http://vod.afreecatv.com/PLAYER/STATION/18650793', + 'info_dict': { + 'id': '18650793', + 'ext': 'mp4', + 'title': '오늘은 다르다! 쏘님의 우월한 위아래~ 댄스리액션!', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': '윈아디', + 'uploader_id': 'badkids', + 'duration': 107, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://vod.afreecatv.com/PLAYER/STATION/10481652', + 'info_dict': { + 'id': '10481652', + 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!'", + 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', + 'uploader': 'dailyapril', + 'uploader_id': 'dailyapril', + 'duration': 6492, + }, + 'playlist_count': 2, + 'playlist': [{ + 'md5': 'd8b7c174568da61d774ef0203159bf97', + 'info_dict': { + 'id': '20160502_c4c62b9d_174361386_1', + 'ext': 'mp4', + 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 1)", + 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', + 'uploader': 'dailyapril', + 'uploader_id': 'dailyapril', + 'upload_date': '20160502', + 'duration': 3601, + }, + }, { + 'md5': '58f2ce7f6044e34439ab2d50612ab02b', + 'info_dict': { + 'id': '20160502_39e739bb_174361386_2', + 'ext': 'mp4', + 'title': "BJ유트루와 함께하는 '팅커벨 메이크업!' (part 2)", + 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', + 'uploader': 'dailyapril', + 'uploader_id': 'dailyapril', + 'upload_date': '20160502', + 'duration': 2891, + }, + }], + 'params': { + 'skip_download': True, + }, + }, { + # non standard key + 'url': 'http://vod.afreecatv.com/PLAYER/STATION/20515605', + 'info_dict': { + 'id': '20170411_BE689A0E_190960999_1_2_h', + 'ext': 'mp4', + 'title': '혼자사는여자집', + 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', + 'uploader': '♥이슬이', + 'uploader_id': 'dasl8121', + 'upload_date': '20170411', + 'duration': 213, + }, + 'params': { + 'skip_download': True, + }, + }, { + # PARTIAL_ADULT + 'url': 'http://vod.afreecatv.com/PLAYER/STATION/32028439', + 'info_dict': { + 'id': '20180327_27901457_202289533_1', + 'ext': 'mp4', + 'title': '[생]빨개요♥ (part 1)', + 'thumbnail': 're:^https?://(?:video|st)img.afreecatv.com/.*$', + 'uploader': '[SA]서아', + 'uploader_id': 'bjdyrksu', + 'upload_date': '20180327', + 'duration': 3601, + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['adult content'], + }, { + 'url': 'http://www.afreecatv.com/player/Player.swf?szType=szBjId=djleegoon&nStationNo=11273158&nBbsNo=13161095&nTitleNo=36327652', + 'only_matching': True, + }, { + 'url': 'http://vod.afreecatv.com/PLAYER/STATION/15055030', + 'only_matching': True, + }] + + @staticmethod + def parse_video_key(key): + video_key = {} + m = re.match(r'^(?P\d{8})_\w+_(?P\d+)$', key) + if m: + video_key['upload_date'] = m.group('upload_date') + video_key['part'] = int(m.group('part')) + return video_key + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_form = { + 'szWork': 'login', + 'szType': 'json', + 'szUid': username, + 'szPassword': password, + 'isSaveId': 'false', + 'szScriptVar': 'oLoginRet', + 'szAction': '', + } + + response = self._download_json( + 'https://login.afreecatv.com/app/LoginAction.php', None, + 'Logging in', data=urlencode_postdata(login_form)) + + _ERRORS = { + -4: 'Your account has been suspended due to a violation of our terms and policies.', + -5: 'https://member.afreecatv.com/app/user_delete_progress.php', + -6: 'https://login.afreecatv.com/membership/changeMember.php', + -8: "Hello! AfreecaTV here.\nThe username you have entered belongs to \n an account that requires a legal guardian's consent. \nIf you wish to use our services without restriction, \nplease make sure to go through the necessary verification process.", + -9: 'https://member.afreecatv.com/app/pop_login_block.php', + -11: 'https://login.afreecatv.com/afreeca/second_login.php', + -12: 'https://member.afreecatv.com/app/user_security.php', + 0: 'The username does not exist or you have entered the wrong password.', + -1: 'The username does not exist or you have entered the wrong password.', + -3: 'You have entered your username/password incorrectly.', + -7: 'You cannot use your Global AfreecaTV account to access Korean AfreecaTV.', + -10: 'Sorry for the inconvenience. \nYour account has been blocked due to an unauthorized access. \nPlease contact our Help Center for assistance.', + -32008: 'You have failed to log in. Please contact our Help Center.', + } + + result = int_or_none(response.get('RESULT')) + if result != 1: + error = _ERRORS.get(result, 'You have failed to log in.') + raise ExtractorError( + 'Unable to login: %s said: %s' % (self.IE_NAME, error), + expected=True) + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + if re.search(r'alert\(["\']This video has been deleted', webpage): + raise ExtractorError( + 'Video %s has been deleted' % video_id, expected=True) + + station_id = self._search_regex( + r'nStationNo\s*=\s*(\d+)', webpage, 'station') + bbs_id = self._search_regex( + r'nBbsNo\s*=\s*(\d+)', webpage, 'bbs') + video_id = self._search_regex( + r'nTitleNo\s*=\s*(\d+)', webpage, 'title', default=video_id) + + partial_view = False + for _ in range(2): + query = { + 'nTitleNo': video_id, + 'nStationNo': station_id, + 'nBbsNo': bbs_id, + } + if partial_view: + query['partialView'] = 'SKIP_ADULT' + video_xml = self._download_xml( + 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php', + video_id, 'Downloading video info XML%s' + % (' (skipping adult)' if partial_view else ''), + video_id, headers={ + 'Referer': url, + }, query=query) + + flag = xpath_text(video_xml, './track/flag', 'flag', default=None) + if flag and flag == 'SUCCEED': + break + if flag == 'PARTIAL_ADULT': + self._downloader.report_warning( + 'In accordance with local laws and regulations, underage users are restricted from watching adult content. ' + 'Only content suitable for all ages will be downloaded. ' + 'Provide account credentials if you wish to download restricted content.') + partial_view = True + continue + elif flag == 'ADULT': + error = 'Only users older than 19 are able to watch this video. Provide account credentials to download this content.' + else: + error = flag + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error), expected=True) + else: + raise ExtractorError('Unable to download video info') + + video_element = video_xml.findall(compat_xpath('./track/video'))[-1] + if video_element is None or video_element.text is None: + raise ExtractorError( + 'Video %s video does not exist' % video_id, expected=True) + + video_url = video_element.text.strip() + + title = xpath_text(video_xml, './track/title', 'title', fatal=True) + + uploader = xpath_text(video_xml, './track/nickname', 'uploader') + uploader_id = xpath_text(video_xml, './track/bj_id', 'uploader id') + duration = int_or_none(xpath_text( + video_xml, './track/duration', 'duration')) + thumbnail = xpath_text(video_xml, './track/titleImage', 'thumbnail') + + common_entry = { + 'uploader': uploader, + 'uploader_id': uploader_id, + 'thumbnail': thumbnail, + } + + info = common_entry.copy() + info.update({ + 'id': video_id, + 'title': title, + 'duration': duration, + }) + + if not video_url: + entries = [] + file_elements = video_element.findall(compat_xpath('./file')) + one = len(file_elements) == 1 + for file_num, file_element in enumerate(file_elements, start=1): + file_url = url_or_none(file_element.text) + if not file_url: + continue + key = file_element.get('key', '') + upload_date = self._search_regex( + r'^(\d{8})_', key, 'upload date', default=None) + file_duration = int_or_none(file_element.get('duration')) + format_id = key if key else '%s_%s' % (video_id, file_num) + if determine_ext(file_url) == 'm3u8': + formats = self._extract_m3u8_formats( + file_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', + note='Downloading part %d m3u8 information' % file_num) + else: + formats = [{ + 'url': file_url, + 'format_id': 'http', + }] + if not formats: + continue + self._sort_formats(formats) + file_info = common_entry.copy() + file_info.update({ + 'id': format_id, + 'title': title if one else '%s (part %d)' % (title, file_num), + 'upload_date': upload_date, + 'duration': file_duration, + 'formats': formats, + }) + entries.append(file_info) + entries_info = info.copy() + entries_info.update({ + '_type': 'multi_video', + 'entries': entries, + }) + return entries_info + + info = { + 'id': video_id, + 'title': title, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'duration': duration, + 'thumbnail': thumbnail, + } + + if determine_ext(video_url) == 'm3u8': + info['formats'] = self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + else: + app, playpath = video_url.split('mp4:') + info.update({ + 'url': app, + 'ext': 'flv', + 'play_path': 'mp4:' + playpath, + 'rtmp_live': True, # downloading won't end without this + }) + + return info diff --git a/youtube_dl/extractor/airmozilla.py b/youtube_dl/extractor/airmozilla.py new file mode 100644 index 000000000..9e38136b4 --- /dev/null +++ b/youtube_dl/extractor/airmozilla.py @@ -0,0 +1,66 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, + parse_iso8601, +) + + +class AirMozillaIE(InfoExtractor): + _VALID_URL = r'https?://air\.mozilla\.org/(?P[0-9a-z-]+)/?' + _TEST = { + 'url': 'https://air.mozilla.org/privacy-lab-a-meetup-for-privacy-minded-people-in-san-francisco/', + 'md5': '8d02f53ee39cf006009180e21df1f3ba', + 'info_dict': { + 'id': '6x4q2w', + 'ext': 'mp4', + 'title': 'Privacy Lab - a meetup for privacy minded people in San Francisco', + 'thumbnail': r're:https?://.*/poster\.jpg', + 'description': 'Brings together privacy professionals and others interested in privacy at for-profits, non-profits, and NGOs in an effort to contribute to the state of the ecosystem...', + 'timestamp': 1422487800, + 'upload_date': '20150128', + 'location': 'SFO Commons', + 'duration': 3780, + 'view_count': int, + 'categories': ['Main', 'Privacy'], + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._html_search_regex(r'//vid\.ly/(.*?)/embed', webpage, 'id') + + embed_script = self._download_webpage('https://vid.ly/{0}/embed'.format(video_id), video_id) + jwconfig = self._parse_json(self._search_regex( + r'initCallback\((.*)\);', embed_script, 'metadata'), video_id)['config'] + + info_dict = self._parse_jwplayer_data(jwconfig, video_id) + view_count = int_or_none(self._html_search_regex( + r'Views since archived: ([0-9]+)', + webpage, 'view count', fatal=False)) + timestamp = parse_iso8601(self._html_search_regex( + r'', webpage), + }) + + return info_dict diff --git a/youtube_dl/extractor/aliexpress.py b/youtube_dl/extractor/aliexpress.py new file mode 100644 index 000000000..6f241e683 --- /dev/null +++ b/youtube_dl/extractor/aliexpress.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + float_or_none, + try_get, +) + + +class AliExpressLiveIE(InfoExtractor): + _VALID_URL = r'https?://live\.aliexpress\.com/live/(?P\d+)' + _TEST = { + 'url': 'https://live.aliexpress.com/live/2800002704436634', + 'md5': 'e729e25d47c5e557f2630eaf99b740a5', + 'info_dict': { + 'id': '2800002704436634', + 'ext': 'mp4', + 'title': 'CASIMA7.22', + 'thumbnail': r're:http://.*\.jpg', + 'uploader': 'CASIMA Official Store', + 'timestamp': 1500717600, + 'upload_date': '20170722', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + data = self._parse_json( + self._search_regex( + r'(?s)runParams\s*=\s*({.+?})\s*;?\s*var', + webpage, 'runParams'), + video_id) + + title = data['title'] + + formats = self._extract_m3u8_formats( + data['replyStreamUrl'], video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls') + + return { + 'id': video_id, + 'title': title, + 'thumbnail': data.get('coverUrl'), + 'uploader': try_get( + data, lambda x: x['followBar']['name'], compat_str), + 'timestamp': float_or_none(data.get('startTimeLong'), scale=1000), + 'formats': formats, + } diff --git a/youtube_dl/extractor/aljazeera.py b/youtube_dl/extractor/aljazeera.py new file mode 100644 index 000000000..c68be3134 --- /dev/null +++ b/youtube_dl/extractor/aljazeera.py @@ -0,0 +1,33 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class AlJazeeraIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P[^/]+)\.html' + + _TESTS = [{ + 'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html', + 'info_dict': { + 'id': '3792260579001', + 'ext': 'mp4', + 'title': 'The Slum - Episode 1: Deliverance', + 'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.', + 'uploader_id': '665003303001', + 'timestamp': 1411116829, + 'upload_date': '20140919', + }, + 'add_ie': ['BrightcoveNew'], + 'skip': 'Not accessible from Travis CI server', + }, { + 'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html', + 'only_matching': True, + }] + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s' + + def _real_extract(self, url): + program_name = self._match_id(url) + webpage = self._download_webpage(url, program_name) + brightcove_id = self._search_regex( + r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id') + return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) diff --git a/youtube_dl/extractor/allocine.py b/youtube_dl/extractor/allocine.py new file mode 100644 index 000000000..cd533acfc --- /dev/null +++ b/youtube_dl/extractor/allocine.py @@ -0,0 +1,132 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + qualities, + remove_end, + try_get, + unified_timestamp, + url_basename, +) + + +class AllocineIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?:article|video|film)/(?:fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=|video-)(?P[0-9]+)(?:\.html)?' + + _TESTS = [{ + 'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html', + 'md5': '0c9fcf59a841f65635fa300ac43d8269', + 'info_dict': { + 'id': '19546517', + 'display_id': '18635087', + 'ext': 'mp4', + 'title': 'Astérix - Le Domaine des Dieux Teaser VF', + 'description': 'md5:4a754271d9c6f16c72629a8a993ee884', + 'thumbnail': r're:http://.*\.jpg', + 'duration': 39, + 'timestamp': 1404273600, + 'upload_date': '20140702', + 'view_count': int, + }, + }, { + 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html', + 'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0', + 'info_dict': { + 'id': '19540403', + 'display_id': '19540403', + 'ext': 'mp4', + 'title': 'Planes 2 Bande-annonce VF', + 'description': 'Regardez la bande annonce du film Planes 2 (Planes 2 Bande-annonce VF). Planes 2, un film de Roberts Gannaway', + 'thumbnail': r're:http://.*\.jpg', + 'duration': 69, + 'timestamp': 1385659800, + 'upload_date': '20131128', + 'view_count': int, + }, + }, { + 'url': 'http://www.allocine.fr/video/player_gen_cmedia=19544709&cfilm=181290.html', + 'md5': '101250fb127ef9ca3d73186ff22a47ce', + 'info_dict': { + 'id': '19544709', + 'display_id': '19544709', + 'ext': 'mp4', + 'title': 'Dragons 2 - Bande annonce finale VF', + 'description': 'md5:6cdd2d7c2687d4c6aafe80a35e17267a', + 'thumbnail': r're:http://.*\.jpg', + 'duration': 144, + 'timestamp': 1397589900, + 'upload_date': '20140415', + 'view_count': int, + }, + }, { + 'url': 'http://www.allocine.fr/video/video-19550147/', + 'md5': '3566c0668c0235e2d224fd8edb389f67', + 'info_dict': { + 'id': '19550147', + 'ext': 'mp4', + 'title': 'Faux Raccord N°123 - Les gaffes de Cliffhanger', + 'description': 'md5:bc734b83ffa2d8a12188d9eb48bb6354', + 'thumbnail': r're:http://.*\.jpg', + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + formats = [] + quality = qualities(['ld', 'md', 'hd']) + + model = self._html_search_regex( + r'data-model="([^"]+)"', webpage, 'data model', default=None) + if model: + model_data = self._parse_json(model, display_id) + video = model_data['videos'][0] + title = video['title'] + for video_url in video['sources'].values(): + video_id, format_id = url_basename(video_url).split('_')[:2] + formats.append({ + 'format_id': format_id, + 'quality': quality(format_id), + 'url': video_url, + }) + duration = int_or_none(video.get('duration')) + view_count = int_or_none(video.get('view_count')) + timestamp = unified_timestamp(try_get( + video, lambda x: x['added_at']['date'], compat_str)) + else: + video_id = display_id + media_data = self._download_json( + 'http://www.allocine.fr/ws/AcVisiondataV5.ashx?media=%s' % video_id, display_id) + title = remove_end( + self._html_search_regex( + r'(?s)(.+?)', webpage, 'title').strip(), + ' - AlloCiné') + for key, value in media_data['video'].items(): + if not key.endswith('Path'): + continue + format_id = key[:-len('Path')] + formats.append({ + 'format_id': format_id, + 'quality': quality(format_id), + 'url': value, + }) + duration, view_count, timestamp = [None] * 3 + + self._sort_formats(formats) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'duration': duration, + 'timestamp': timestamp, + 'view_count': view_count, + 'formats': formats, + } diff --git a/youtube_dl/extractor/alphaporno.py b/youtube_dl/extractor/alphaporno.py new file mode 100644 index 000000000..3a6d99f6b --- /dev/null +++ b/youtube_dl/extractor/alphaporno.py @@ -0,0 +1,77 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + parse_iso8601, + parse_duration, + parse_filesize, + int_or_none, +) + + +class AlphaPornoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?alphaporno\.com/videos/(?P[^/]+)' + _TEST = { + 'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/', + 'md5': 'feb6d3bba8848cd54467a87ad34bd38e', + 'info_dict': { + 'id': '258807', + 'display_id': 'sensual-striptease-porn-with-samantha-alexandra', + 'ext': 'mp4', + 'title': 'Sensual striptease porn with Samantha Alexandra', + 'thumbnail': r're:https?://.*\.jpg$', + 'timestamp': 1418694611, + 'upload_date': '20141216', + 'duration': 387, + 'filesize_approx': 54120000, + 'tbr': 1145, + 'categories': list, + 'age_limit': 18, + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + r"video_id\s*:\s*'([^']+)'", webpage, 'video id', default=None) + + video_url = self._search_regex( + r"video_url\s*:\s*'([^']+)'", webpage, 'video url') + ext = self._html_search_meta( + 'encodingFormat', webpage, 'ext', default='.mp4')[1:] + + title = self._search_regex( + [r'', + r'class="title" itemprop="name">([^<]+)<'], + webpage, 'title') + thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail') + timestamp = parse_iso8601(self._html_search_meta( + 'uploadDate', webpage, 'upload date')) + duration = parse_duration(self._html_search_meta( + 'duration', webpage, 'duration')) + filesize_approx = parse_filesize(self._html_search_meta( + 'contentSize', webpage, 'file size')) + bitrate = int_or_none(self._html_search_meta( + 'bitrate', webpage, 'bitrate')) + categories = self._html_search_meta( + 'keywords', webpage, 'categories', default='').split(',') + + age_limit = self._rta_search(webpage) + + return { + 'id': video_id, + 'display_id': display_id, + 'url': video_url, + 'ext': ext, + 'title': title, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'filesize_approx': filesize_approx, + 'tbr': bitrate, + 'categories': categories, + 'age_limit': age_limit, + } diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dl/extractor/amcnetworks.py new file mode 100644 index 000000000..6fb3d6c53 --- /dev/null +++ b/youtube_dl/extractor/amcnetworks.py @@ -0,0 +1,118 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .theplatform import ThePlatformIE +from ..utils import ( + int_or_none, + parse_age_limit, + try_get, + update_url_query, +) + + +class AMCNetworksIE(ThePlatformIE): + _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P[^/?#]+)' + _TESTS = [{ + 'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1', + 'md5': '', + 'info_dict': { + 'id': 's3MX01Nl4vPH', + 'ext': 'mp4', + 'title': 'Maron - Season 4 - Step 1', + 'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.', + 'age_limit': 17, + 'upload_date': '20160505', + 'timestamp': 1462468831, + 'uploader': 'AMCN', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'skip': 'Requires TV provider accounts', + }, { + 'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge', + 'only_matching': True, + }, { + 'url': 'http://www.amc.com/shows/preacher/full-episodes/season-01/episode-00/pilot', + 'only_matching': True, + }, { + 'url': 'http://www.wetv.com/shows/million-dollar-matchmaker/season-01/episode-06-the-dumped-dj-and-shallow-hal', + 'only_matching': True, + }, { + 'url': 'http://www.ifc.com/movies/chaos', + 'only_matching': True, + }, { + 'url': 'http://www.bbcamerica.com/shows/doctor-who/full-episodes/the-power-of-the-daleks/episode-01-episode-1-color-version', + 'only_matching': True, + }, { + 'url': 'http://www.wetv.com/shows/mama-june-from-not-to-hot/full-episode/season-01/thin-tervention', + 'only_matching': True, + }, { + 'url': 'http://www.wetv.com/shows/la-hair/videos/season-05/episode-09-episode-9-2/episode-9-sneak-peek-3', + 'only_matching': True, + }, { + 'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + query = { + 'mbr': 'true', + 'manifest': 'm3u', + } + media_url = self._search_regex( + r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)', + webpage, 'media url') + theplatform_metadata = self._download_theplatform_metadata(self._search_regex( + r'link\.theplatform\.com/s/([^?]+)', + media_url, 'theplatform_path'), display_id) + info = self._parse_theplatform_metadata(theplatform_metadata) + video_id = theplatform_metadata['pid'] + title = theplatform_metadata['title'] + rating = try_get( + theplatform_metadata, lambda x: x['ratings'][0]['rating']) + auth_required = self._search_regex( + r'window\.authRequired\s*=\s*(true|false);', + webpage, 'auth required') + if auth_required == 'true': + requestor_id = self._search_regex( + r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)', + webpage, 'requestor id') + resource = self._get_mvpd_resource( + requestor_id, title, video_id, rating) + query['auth'] = self._extract_mvpd_auth( + url, video_id, requestor_id, resource) + media_url = update_url_query(media_url, query) + formats, subtitles = self._extract_theplatform_smil( + media_url, video_id) + self._sort_formats(formats) + info.update({ + 'id': video_id, + 'subtitles': subtitles, + 'formats': formats, + 'age_limit': parse_age_limit(parse_age_limit(rating)), + }) + ns_keys = theplatform_metadata.get('$xmlns', {}).keys() + if ns_keys: + ns = list(ns_keys)[0] + series = theplatform_metadata.get(ns + '$show') + season_number = int_or_none( + theplatform_metadata.get(ns + '$season')) + episode = theplatform_metadata.get(ns + '$episodeTitle') + episode_number = int_or_none( + theplatform_metadata.get(ns + '$episode')) + if season_number: + title = 'Season %d - %s' % (season_number, title) + if series: + title = '%s - %s' % (series, title) + info.update({ + 'title': title, + 'series': series, + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, + }) + return info diff --git a/youtube_dl/extractor/americastestkitchen.py b/youtube_dl/extractor/americastestkitchen.py new file mode 100644 index 000000000..8b32aa886 --- /dev/null +++ b/youtube_dl/extractor/americastestkitchen.py @@ -0,0 +1,92 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + clean_html, + int_or_none, + try_get, + unified_strdate, +) + + +class AmericasTestKitchenIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party', + 'md5': 'b861c3e365ac38ad319cfd509c30577f', + 'info_dict': { + 'id': '1_5g5zua6e', + 'title': 'Summer Dinner Party', + 'ext': 'mp4', + 'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec', + 'thumbnail': r're:^https?://.*\.jpg', + 'timestamp': 1497285541, + 'upload_date': '20170612', + 'uploader_id': 'roger.metcalf@americastestkitchen.com', + 'release_date': '20170617', + 'series': "America's Test Kitchen", + 'season_number': 17, + 'episode': 'Summer Dinner Party', + 'episode_number': 24, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + video_data = self._parse_json( + self._search_regex( + r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*', + webpage, 'initial context'), + video_id) + + ep_data = try_get( + video_data, + (lambda x: x['episodeDetail']['content']['data'], + lambda x: x['videoDetail']['content']['data']), dict) + ep_meta = ep_data.get('full_video', {}) + + zype_id = ep_meta.get('zype_id') + if zype_id: + embed_url = 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id + ie_key = 'Zype' + else: + partner_id = self._search_regex( + r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)', + webpage, 'kaltura partner id') + external_id = ep_data.get('external_id') or ep_meta['external_id'] + embed_url = 'kaltura:%s:%s' % (partner_id, external_id) + ie_key = 'Kaltura' + + title = ep_data.get('title') or ep_meta.get('title') + description = clean_html(ep_meta.get('episode_description') or ep_data.get( + 'description') or ep_meta.get('description')) + thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url']) + release_date = unified_strdate(ep_data.get('aired_at')) + + season_number = int_or_none(ep_meta.get('season_number')) + episode = ep_meta.get('title') + episode_number = int_or_none(ep_meta.get('episode_number')) + + return { + '_type': 'url_transparent', + 'url': embed_url, + 'ie_key': ie_key, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'release_date': release_date, + 'series': "America's Test Kitchen", + 'season_number': season_number, + 'episode': episode, + 'episode_number': episode_number, + } diff --git a/youtube_dl/extractor/amp.py b/youtube_dl/extractor/amp.py new file mode 100644 index 000000000..7ff098cfa --- /dev/null +++ b/youtube_dl/extractor/amp.py @@ -0,0 +1,102 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, + mimetype2ext, + parse_iso8601, + url_or_none, +) + + +class AMPIE(InfoExtractor): + # parse Akamai Adaptive Media Player feed + def _extract_feed_info(self, url): + feed = self._download_json( + url, None, 'Downloading Akamai AMP feed', + 'Unable to download Akamai AMP feed') + item = feed.get('channel', {}).get('item') + if not item: + raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error'])) + + video_id = item['guid'] + + def get_media_node(name, default=None): + media_name = 'media-%s' % name + media_group = item.get('media-group') or item + return media_group.get(media_name) or item.get(media_name) or item.get(name, default) + + thumbnails = [] + media_thumbnail = get_media_node('thumbnail') + if media_thumbnail: + if isinstance(media_thumbnail, dict): + media_thumbnail = [media_thumbnail] + for thumbnail_data in media_thumbnail: + thumbnail = thumbnail_data.get('@attributes', {}) + thumbnail_url = url_or_none(thumbnail.get('url')) + if not thumbnail_url: + continue + thumbnails.append({ + 'url': self._proto_relative_url(thumbnail_url, 'http:'), + 'width': int_or_none(thumbnail.get('width')), + 'height': int_or_none(thumbnail.get('height')), + }) + + subtitles = {} + media_subtitle = get_media_node('subTitle') + if media_subtitle: + if isinstance(media_subtitle, dict): + media_subtitle = [media_subtitle] + for subtitle_data in media_subtitle: + subtitle = subtitle_data.get('@attributes', {}) + subtitle_href = url_or_none(subtitle.get('href')) + if not subtitle_href: + continue + subtitles.setdefault(subtitle.get('lang') or 'en', []).append({ + 'url': subtitle_href, + 'ext': mimetype2ext(subtitle.get('type')) or determine_ext(subtitle_href), + }) + + formats = [] + media_content = get_media_node('content') + if isinstance(media_content, dict): + media_content = [media_content] + for media_data in media_content: + media = media_data.get('@attributes', {}) + media_url = url_or_none(media.get('url')) + if not media_url: + continue + ext = mimetype2ext(media.get('type')) or determine_ext(media_url) + if ext == 'f4m': + formats.extend(self._extract_f4m_formats( + media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124', + video_id, f4m_id='hds', fatal=False)) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'), + 'url': media_url, + 'tbr': int_or_none(media.get('bitrate')), + 'filesize': int_or_none(media.get('fileSize')), + 'ext': ext, + }) + + self._sort_formats(formats) + + timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date')) + + return { + 'id': video_id, + 'title': get_media_node('title'), + 'description': get_media_node('description'), + 'thumbnails': thumbnails, + 'timestamp': timestamp, + 'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')), + 'subtitles': subtitles, + 'formats': formats, + } diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dl/extractor/animeondemand.py new file mode 100644 index 000000000..00ce684d1 --- /dev/null +++ b/youtube_dl/extractor/animeondemand.py @@ -0,0 +1,293 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + determine_ext, + extract_attributes, + ExtractorError, + url_or_none, + urlencode_postdata, + urljoin, +) + + +class AnimeOnDemandIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?anime-on-demand\.de/anime/(?P\d+)' + _LOGIN_URL = 'https://www.anime-on-demand.de/users/sign_in' + _APPLY_HTML5_URL = 'https://www.anime-on-demand.de/html5apply' + _NETRC_MACHINE = 'animeondemand' + # German-speaking countries of Europe + _GEO_COUNTRIES = ['AT', 'CH', 'DE', 'LI', 'LU'] + _TESTS = [{ + # jap, OmU + 'url': 'https://www.anime-on-demand.de/anime/161', + 'info_dict': { + 'id': '161', + 'title': 'Grimgar, Ashes and Illusions (OmU)', + 'description': 'md5:6681ce3c07c7189d255ac6ab23812d31', + }, + 'playlist_mincount': 4, + }, { + # Film wording is used instead of Episode, ger/jap, Dub/OmU + 'url': 'https://www.anime-on-demand.de/anime/39', + 'only_matching': True, + }, { + # Episodes without titles, jap, OmU + 'url': 'https://www.anime-on-demand.de/anime/162', + 'only_matching': True, + }, { + # ger/jap, Dub/OmU, account required + 'url': 'https://www.anime-on-demand.de/anime/169', + 'only_matching': True, + }, { + # Full length film, non-series, ger/jap, Dub/OmU, account required + 'url': 'https://www.anime-on-demand.de/anime/185', + 'only_matching': True, + }, { + # Flash videos + 'url': 'https://www.anime-on-demand.de/anime/12', + 'only_matching': True, + }] + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + + if '>Our licensing terms allow the distribution of animes only to German-speaking countries of Europe' in login_page: + self.raise_geo_restricted( + '%s is only available in German-speaking countries of Europe' % self.IE_NAME) + + login_form = self._form_hidden_inputs('new_user', login_page) + + login_form.update({ + 'user[login]': username, + 'user[password]': password, + }) + + post_url = self._search_regex( + r']+action=(["\'])(?P.+?)\1', login_page, + 'post url', default=self._LOGIN_URL, group='url') + + if not post_url.startswith('http'): + post_url = urljoin(self._LOGIN_URL, post_url) + + response = self._download_webpage( + post_url, None, 'Logging in', + data=urlencode_postdata(login_form), headers={ + 'Referer': self._LOGIN_URL, + }) + + if all(p not in response for p in ('>Logout<', 'href="/users/sign_out"')): + error = self._search_regex( + r']+\bclass=(["\'])(?:(?!\1).)*\balert\b(?:(?!\1).)*\1[^>]*>(?P.+?)

    ', + response, 'error', default=None, group='error') + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError('Unable to log in') + + def _real_initialize(self): + self._login() + + def _real_extract(self, url): + anime_id = self._match_id(url) + + webpage = self._download_webpage(url, anime_id) + + if 'data-playlist=' not in webpage: + self._download_webpage( + self._APPLY_HTML5_URL, anime_id, + 'Activating HTML5 beta', 'Unable to apply HTML5 beta') + webpage = self._download_webpage(url, anime_id) + + csrf_token = self._html_search_meta( + 'csrf-token', webpage, 'csrf token', fatal=True) + + anime_title = self._html_search_regex( + r'(?s)]+itemprop="name"[^>]*>(.+?)', + webpage, 'anime name') + anime_description = self._html_search_regex( + r'(?s)]+itemprop="description"[^>]*>(.+?)
  • ', + webpage, 'anime description', default=None) + + entries = [] + + def extract_info(html, video_id, num=None): + title, description = [None] * 2 + formats = [] + + for input_ in re.findall( + r']+class=["\'].*?streamstarter[^>]+>', html): + attributes = extract_attributes(input_) + title = attributes.get('data-dialog-header') + playlist_urls = [] + for playlist_key in ('data-playlist', 'data-otherplaylist', 'data-stream'): + playlist_url = attributes.get(playlist_key) + if isinstance(playlist_url, compat_str) and re.match( + r'/?[\da-zA-Z]+', playlist_url): + playlist_urls.append(attributes[playlist_key]) + if not playlist_urls: + continue + + lang = attributes.get('data-lang') + lang_note = attributes.get('value') + + for playlist_url in playlist_urls: + kind = self._search_regex( + r'videomaterialurl/\d+/([^/]+)/', + playlist_url, 'media kind', default=None) + format_id_list = [] + if lang: + format_id_list.append(lang) + if kind: + format_id_list.append(kind) + if not format_id_list and num is not None: + format_id_list.append(compat_str(num)) + format_id = '-'.join(format_id_list) + format_note = ', '.join(filter(None, (kind, lang_note))) + item_id_list = [] + if format_id: + item_id_list.append(format_id) + item_id_list.append('videomaterial') + playlist = self._download_json( + urljoin(url, playlist_url), video_id, + 'Downloading %s JSON' % ' '.join(item_id_list), + headers={ + 'X-Requested-With': 'XMLHttpRequest', + 'X-CSRF-Token': csrf_token, + 'Referer': url, + 'Accept': 'application/json, text/javascript, */*; q=0.01', + }, fatal=False) + if not playlist: + continue + stream_url = url_or_none(playlist.get('streamurl')) + if stream_url: + rtmp = re.search( + r'^(?Prtmpe?://(?P[^/]+)/(?P.+/))(?Pmp[34]:.+)', + stream_url) + if rtmp: + formats.append({ + 'url': rtmp.group('url'), + 'app': rtmp.group('app'), + 'play_path': rtmp.group('playpath'), + 'page_url': url, + 'player_url': 'https://www.anime-on-demand.de/assets/jwplayer.flash-55abfb34080700304d49125ce9ffb4a6.swf', + 'rtmp_real_time': True, + 'format_id': 'rtmp', + 'ext': 'flv', + }) + continue + start_video = playlist.get('startvideo', 0) + playlist = playlist.get('playlist') + if not playlist or not isinstance(playlist, list): + continue + playlist = playlist[start_video] + title = playlist.get('title') + if not title: + continue + description = playlist.get('description') + for source in playlist.get('sources', []): + file_ = source.get('file') + if not file_: + continue + ext = determine_ext(file_) + format_id_list = [lang, kind] + if ext == 'm3u8': + format_id_list.append('hls') + elif source.get('type') == 'video/dash' or ext == 'mpd': + format_id_list.append('dash') + format_id = '-'.join(filter(None, format_id_list)) + if ext == 'm3u8': + file_formats = self._extract_m3u8_formats( + file_, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False) + elif source.get('type') == 'video/dash' or ext == 'mpd': + continue + file_formats = self._extract_mpd_formats( + file_, video_id, mpd_id=format_id, fatal=False) + else: + continue + for f in file_formats: + f.update({ + 'language': lang, + 'format_note': format_note, + }) + formats.extend(file_formats) + + return { + 'title': title, + 'description': description, + 'formats': formats, + } + + def extract_entries(html, video_id, common_info, num=None): + info = extract_info(html, video_id, num) + + if info['formats']: + self._sort_formats(info['formats']) + f = common_info.copy() + f.update(info) + entries.append(f) + + # Extract teaser/trailer only when full episode is not available + if not info['formats']: + m = re.search( + r'data-dialog-header=(["\'])(?P.+?)\1[^>]+href=(["\'])(?P<href>.+?)\3[^>]*>(?P<kind>Teaser|Trailer)<', + html) + if m: + f = common_info.copy() + f.update({ + 'id': '%s-%s' % (f['id'], m.group('kind').lower()), + 'title': m.group('title'), + 'url': urljoin(url, m.group('href')), + }) + entries.append(f) + + def extract_episodes(html): + for num, episode_html in enumerate(re.findall( + r'(?s)<h3[^>]+class="episodebox-title".+?>Episodeninhalt<', html), 1): + episodebox_title = self._search_regex( + (r'class="episodebox-title"[^>]+title=(["\'])(?P<title>.+?)\1', + r'class="episodebox-title"[^>]+>(?P<title>.+?)<'), + episode_html, 'episodebox title', default=None, group='title') + if not episodebox_title: + continue + + episode_number = int(self._search_regex( + r'(?:Episode|Film)\s*(\d+)', + episodebox_title, 'episode number', default=num)) + episode_title = self._search_regex( + r'(?:Episode|Film)\s*\d+\s*-\s*(.+)', + episodebox_title, 'episode title', default=None) + + video_id = 'episode-%d' % episode_number + + common_info = { + 'id': video_id, + 'series': anime_title, + 'episode': episode_title, + 'episode_number': episode_number, + } + + extract_entries(episode_html, video_id, common_info) + + def extract_film(html, video_id): + common_info = { + 'id': anime_id, + 'title': anime_title, + 'description': anime_description, + } + extract_entries(html, video_id, common_info) + + extract_episodes(webpage) + + if not entries: + extract_film(webpage, anime_id) + + return self.playlist_result(entries, anime_id, anime_title, anime_description) diff --git a/youtube_dl/extractor/anvato.py b/youtube_dl/extractor/anvato.py new file mode 100644 index 000000000..84e841035 --- /dev/null +++ b/youtube_dl/extractor/anvato.py @@ -0,0 +1,314 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import hashlib +import json +import random +import re +import time + +from .common import InfoExtractor +from ..aes import aes_encrypt +from ..compat import compat_str +from ..utils import ( + bytes_to_intlist, + determine_ext, + intlist_to_bytes, + int_or_none, + strip_jsonp, + unescapeHTML, + unsmuggle_url, +) + + +def md5_text(s): + if not isinstance(s, compat_str): + s = compat_str(s) + return hashlib.md5(s.encode('utf-8')).hexdigest() + + +class AnvatoIE(InfoExtractor): + _VALID_URL = r'anvato:(?P<access_key_or_mcp>[^:]+):(?P<id>\d+)' + + # Copied from anvplayer.min.js + _ANVACK_TABLE = { + 'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ', + 'nbcu_nbcd_desktop_web_qa_1a6f01bdd0dc45a439043b694c8a031d': 'eSxJUbA2UUKBTXryyQ2d6NuM8oEqaPySvaPzfKNA', + 'nbcu_nbcd_desktop_web_acc_eb2ff240a5d4ae9a63d4c297c32716b6c523a129': '89JR3RtUGbvKuuJIiKOMK0SoarLb5MUx8v89RcbP', + 'nbcu_nbcd_watchvod_web_prod_e61107507180976724ec8e8319fe24ba5b4b60e1': 'Uc7dFt7MJ9GsBWB5T7iPvLaMSOt8BBxv4hAXk5vv', + 'nbcu_nbcd_watchvod_web_qa_42afedba88a36203db5a4c09a5ba29d045302232': 'T12oDYVFP2IaFvxkmYMy5dKxswpLHtGZa4ZAXEi7', + 'nbcu_nbcd_watchvod_web_acc_9193214448e2e636b0ffb78abacfd9c4f937c6ca': 'MmobcxUxMedUpohNWwXaOnMjlbiyTOBLL6d46ZpR', + 'nbcu_local_monitor_web_acc_f998ad54eaf26acd8ee033eb36f39a7b791c6335': 'QvfIoPYrwsjUCcASiw3AIkVtQob2LtJHfidp9iWg', + 'nbcu_cable_monitor_web_acc_a413759603e8bedfcd3c61b14767796e17834077': 'uwVPJLShvJWSs6sWEIuVem7MTF8A4IknMMzIlFto', + 'nbcu_nbcd_mcpstage_web_qa_4c43a8f6e95a88dbb40276c0630ba9f693a63a4e': 'PxVYZVwjhgd5TeoPRxL3whssb5OUPnM3zyAzq8GY', + 'nbcu_comcast_comcast_web_prod_074080762ad4ce956b26b43fb22abf153443a8c4': 'afnaRZfDyg1Z3WZHdupKfy6xrbAG2MHqe3VfuSwh', + 'nbcu_comcast_comcast_web_qa_706103bb93ead3ef70b1de12a0e95e3c4481ade0': 'DcjsVbX9b3uoPlhdriIiovgFQZVxpISZwz0cx1ZK', + 'nbcu_comcast_comcastcable_web_prod_669f04817536743563d7331c9293e59fbdbe3d07': '0RwMN2cWy10qhAhOscq3eK7aEe0wqnKt3vJ0WS4D', + 'nbcu_comcast_comcastcable_web_qa_3d9d2d66219094127f0f6b09cc3c7bb076e3e1ca': '2r8G9DEya7PCqBceKZgrn2XkXgASjwLMuaFE1Aad', + 'hearst_hearst_demo_web_stage_960726dfef3337059a01a78816e43b29ec04dfc7': 'cuZBPXTR6kSdoTCVXwk5KGA8rk3NrgGn4H6e9Dsp', + 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922': 'IOaaLQ8ymqVyem14QuAvE5SndQynTcH5CrLkU2Ih', + 'anvato_nextmedia_demo_web_stage_9787d56a02ff6b9f43e9a2b0920d8ca88beb5818': 'Pqu9zVzI1ApiIzbVA3VkGBEQHvdKSUuKpD6s2uaR', + 'anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a': 'du1ccmn7RxzgizwbWU7hyUaGodNlJn7HtXI0WgXW', + 'anvato_scripps_app_web_stage_360797e00fe2826be142155c4618cc52fce6c26c': '2PMrQ0BRoqCWl7nzphj0GouIMEh2mZYivAT0S1Su', + 'fs2go_fs2go_go_all_prod_21934911ccfafc03a075894ead2260d11e2ddd24': 'RcuHlKikW2IJw6HvVoEkqq2UsuEJlbEl11pWXs4Q', + 'fs2go_fs2go_go_web_prod_ead4b0eec7460c1a07783808db21b49cf1f2f9a7': '4K0HTT2u1zkQA2MaGaZmkLa1BthGSBdr7jllrhk5', + 'fs2go_fs2go_go_web_stage_407585454a4400355d4391691c67f361': 'ftnc37VKRJBmHfoGGi3kT05bHyeJzilEzhKJCyl3', + 'fs2go_fs2go_go_android_stage_44b714db6f8477f29afcba15a41e1d30': 'CtxpPvVpo6AbZGomYUhkKs7juHZwNml9b9J0J2gI', + 'anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67': 'Pw0XX5KBDsyRnPS0R2JrSrXftsy8Jnz5pAjaYC8s', + 'anvato_cbslocal_app_web_stage_547a5f096594cd3e00620c6f825cad1096d28c80': '37OBUhX2uwNyKhhrNzSSNHSRPZpApC3trdqDBpuz', + 'fs2go_att_att_web_prod_1042dddd089a05438b6a08f972941176f699ffd8': 'JLcF20JwYvpv6uAGcLWIaV12jKwaL1R8us4b6Zkg', + 'fs2go_att_att_web_stage_807c5001955fc114a3331fe027ddc76e': 'gbu1oO1y0JiOFh4SUipt86P288JHpyjSqolrrT1x', + 'fs2go_fs2go_tudor_web_prod_a7dd8e5a7cdc830cae55eae6f3e9fee5ee49eb9b': 'ipcp87VCEZXPPe868j3orLqzc03oTy7DXsGkAXXH', + 'anvato_mhz_app_web_prod_b808218b30de7fdf60340cbd9831512bc1bf6d37': 'Stlm5Gs6BEhJLRTZHcNquyzxGqr23EuFmE5DCgjX', + 'fs2go_charter_charter_web_stage_c2c6e5a68375a1bf00fff213d3ff8f61a835a54c': 'Lz4hbJp1fwL6jlcz4M2PMzghM4jp4aAmybtT5dPc', + 'fs2go_charter_charter_web_prod_ebfe3b10f1af215a7321cd3d629e0b81dfa6fa8c': 'vUJsK345A1bVmyYDRhZX0lqFIgVXuqhmuyp1EtPK', + 'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b': 'GDKq1ixvX3MoBNdU5IOYmYa2DTUXYOozPjrCJnW7', + 'anvato_epfox_app_web_stage_a3c2ce60f8f83ef374a88b68ee73a950f8ab87ce': '2jz2NH4BsXMaDsoJ5qkHMbcczAfIReo2eFYuVC1C', + 'fs2go_verizon_verizon_web_stage_08e6df0354a4803f1b1f2428b5a9a382e8dbcd62': 'rKTVapNaAcmnUbGL4ZcuOoY4SE7VmZSQsblPFr7e', + 'fs2go_verizon_verizon_web_prod_f909564cb606eff1f731b5e22e0928676732c445': 'qLSUuHerM3u9eNPzaHyUK52obai5MvE4XDJfqYe1', + 'fs2go_foxcom_synd_web_stage_f7b9091f00ea25a4fdaaae77fca5b54cdc7e7043': '96VKF2vLd24fFiDfwPFpzM5llFN4TiIGAlodE0Re', + 'fs2go_foxcom_synd_web_prod_0f2cdd64d87e4ab6a1d54aada0ff7a7c8387a064': 'agiPjbXEyEZUkbuhcnmVPhe9NNVbDjCFq2xkcx51', + 'anvato_own_app_web_stage_1214ade5d28422c4dae9d03c1243aba0563c4dba': 'mzhamNac3swG4WsJAiUTacnGIODi6SWeVWk5D7ho', + 'anvato_own_app_web_prod_944e162ed927ec3e9ed13eb68ed2f1008ee7565e': '9TSxh6G2TXOLBoYm9ro3LdNjjvnXpKb8UR8KoIP9', + 'anvato_scripps_app_ftv_prod_a10a10468edd5afb16fb48171c03b956176afad1': 'COJ2i2UIPK7xZqIWswxe7FaVBOVgRkP1F6O6qGoH', + 'anvato_scripps_app_ftv_stage_77d3ad2bdb021ec37ca2e35eb09acd396a974c9a': 'Q7nnopNLe2PPfGLOTYBqxSaRpl209IhqaEuDZi1F', + 'anvato_univision_app_web_stage_551236ef07a0e17718c3995c35586b5ed8cb5031': 'D92PoLS6UitwxDRA191HUGT9OYcOjV6mPMa5wNyo', + 'anvato_univision_app_web_prod_039a5c0a6009e637ae8ac906718a79911e0e65e1': '5mVS5u4SQjtw6NGw2uhMbKEIONIiLqRKck5RwQLR', + 'nbcu_cnbc_springfield_ios_prod_670207fae43d6e9a94c351688851a2ce': 'M7fqCCIP9lW53oJbHs19OlJlpDrVyc2OL8gNeuTa', + 'nbcu_cnbc_springfieldvod_ios_prod_7a5f04b1ceceb0e9c9e2264a44aa236e08e034c2': 'Yia6QbJahW0S7K1I0drksimhZb4UFq92xLBmmMvk', + 'anvato_cox_app_web_prod_ce45cda237969f93e7130f50ee8bb6280c1484ab': 'cc0miZexpFtdoqZGvdhfXsLy7FXjRAOgb9V0f5fZ', + 'anvato_cox_app_web_stage_c23dbe016a8e9d8c7101d10172b92434f6088bf9': 'yivU3MYHd2eDZcOfmLbINVtqxyecKTOp8OjOuoGJ', + 'anvato_chnzero_app_web_stage_b1164d1352b579e792e542fddf13ee34c0eeb46b': 'A76QkXMmVH8lTCfU15xva1mZnSVcqeY4Xb22Kp7m', + 'anvato_chnzero_app_web_prod_253d358928dc08ec161eda2389d53707288a730c': 'OA5QI3ZWZZkdtUEDqh28AH8GedsF6FqzJI32596b', + 'anvato_discovery_vodpoc_web_stage_9fa7077b5e8af1f8355f65d4fb8d2e0e9d54e2b7': 'q3oT191tTQ5g3JCP67PkjLASI9s16DuWZ6fYmry3', + 'anvato_discovery_vodpoc_web_prod_688614983167a1af6cdf6d76343fda10a65223c1': 'qRvRQCTVHd0VVOHsMvvfidyWmlYVrTbjby7WqIuK', + 'nbcu_cnbc_springfieldvod_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua', + 'nbcu_cnbc_springfield_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua', + 'nbcu_nbcd_capture_web_stage_4dd9d585bfb984ebf856dee35db027b2465cc4ae': '0j1Ov4Vopyi2HpBZJYdL2m8ERJVGYh3nNpzPiO8F', + 'nbcu_nbcd_watch3_android_prod_7712ca5fcf1c22f19ec1870a9650f9c37db22dcf': '3LN2UB3rPUAMu7ZriWkHky9vpLMXYha8JbSnxBlx', + 'nbcu_nbcd_watchvod3_android_prod_0910a3a4692d57c0b5ff4316075bc5d096be45b9': 'mJagcQ2II30vUOAauOXne7ERwbf5S9nlB3IP17lQ', + 'anvato_scripps_app_atv_prod_790deda22e16e71e83df58f880cd389908a45d52': 'CB6trI1mpoDIM5o54DNTsji90NDBQPZ4z4RqBNSH', + 'nbcu_nbcd_watchv4_android_prod_ff67cef9cb409158c6f8c3533edddadd0b750507': 'j8CHQCUWjlYERj4NFRmUYOND85QNbHViH09UwuKm', + 'nbcu_nbcd_watchvodv4_android_prod_a814d781609989dea6a629d50ae4c7ad8cc8e907': 'rkVnUXxdA9rawVLUlDQtMue9Y4Q7lFEaIotcUhjt', + 'rvVKpA50qlOPLFxMjrCGf5pdkdQDm7qn': '1J7ZkY5Qz5lMLi93QOH9IveE7EYB3rLl', + 'nbcu_dtv_local_web_prod_b266cf49defe255fd4426a97e27c09e513e9f82f': 'HuLnJDqzLa4saCzYMJ79zDRSQpEduw1TzjMNQu2b', + 'nbcu_att_local_web_prod_4cef038b2d969a6b7d700a56a599040b6a619f67': 'Q0Em5VDc2KpydUrVwzWRXAwoNBulWUxCq2faK0AV', + 'nbcu_dish_local_web_prod_c56dcaf2da2e9157a4266c82a78195f1dd570f6b': 'bC1LWmRz9ayj2AlzizeJ1HuhTfIaJGsDBnZNgoRg', + 'nbcu_verizon_local_web_prod_88bebd2ce006d4ed980de8133496f9a74cb9b3e1': 'wzhDKJZpgvUSS1EQvpCQP8Q59qVzcPixqDGJefSk', + 'nbcu_charter_local_web_prod_9ad90f7fc4023643bb718f0fe0fd5beea2382a50': 'PyNbxNhEWLzy1ZvWEQelRuIQY88Eub7xbSVRMdfT', + 'nbcu_suddenlink_local_web_prod_20fb711725cac224baa1c1cb0b1c324d25e97178': '0Rph41lPXZbb3fqeXtHjjbxfSrNbtZp1Ygq7Jypa', + 'nbcu_wow_local_web_prod_652d9ce4f552d9c2e7b5b1ed37b8cb48155174ad': 'qayIBZ70w1dItm2zS42AptXnxW15mkjRrwnBjMPv', + 'nbcu_centurylink_local_web_prod_2034402b029bf3e837ad46814d9e4b1d1345ccd5': 'StePcPMkjsX51PcizLdLRMzxMEl5k2FlsMLUNV4k', + 'nbcu_atlanticbrd_local_web_prod_8d5f5ecbf7f7b2f5e6d908dd75d90ae3565f682e': 'NtYLb4TFUS0pRs3XTkyO5sbVGYjVf17bVbjaGscI', + 'nbcu_nbcd_watchvod_web_dev_08bc05699be47c4f31d5080263a8cfadc16d0f7c': 'hwxi2dgDoSWgfmVVXOYZm14uuvku4QfopstXckhr', + 'anvato_nextmedia_app_web_prod_a4fa8c7204aa65e71044b57aaf63711980cfe5a0': 'tQN1oGPYY1nM85rJYePWGcIb92TG0gSqoVpQTWOw', + 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749': 'GUXNf5ZDX2jFUpu4WT2Go4DJ5nhUCzpnwDRRUx1K', + 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa': 'bLDYF8JqfG42b7bwKEgQiU9E2LTIAtnKzSgYpFUH', + 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a': 'icgGoYGipQMMSEvhplZX1pwbN69srwKYWksz3xWK', + 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336': 'fA2iQdI7RDpynqzQYIpXALVS83NTPr8LLFK4LFsu', + 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg', + 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg', + 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99': 'P3uXJ0fXXditBPCGkfvlnVScpPEfKmc64Zv7ZgbK', + 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe': 'mGPvo5ZA5SgjOFAPEPXv7AnOpFUICX8hvFQVz69n', + 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582': 'qyT6PXXLjVNCrHaRVj0ugAhalNRS7Ee9BP7LUokD', + 'nbcu_nbcd_watchvodv4_web_stage_4108362fba2d4ede21f262fea3c4162cbafd66c7': 'DhaU5lj0W2gEdcSSsnxURq8t7KIWtJfD966crVDk', + 'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn', + 'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W', + 'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ', + 'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ' + } + + _MCP_TO_ACCESS_KEY_TABLE = { + 'qa': 'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922', + 'lin': 'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749', + 'univison': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa', + 'uni': 'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa', + 'dev': 'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a', + 'sps': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336', + 'spsstg': 'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336', + 'anv': 'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3', + 'gray': 'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900', + 'hearst': 'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99', + 'cbs': 'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe', + 'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582' + } + + _API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA' + + _ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1' + _AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce' + + _TESTS = [{ + # from https://www.boston25news.com/news/watch-humpback-whale-breaches-right-next-to-fishing-boat-near-nh/817484874 + 'url': 'anvato:8v9BEynrwx8EFLYpgfOWcG1qJqyXKlRM:4465496', + 'info_dict': { + 'id': '4465496', + 'ext': 'mp4', + 'title': 'VIDEO: Humpback whale breaches right next to NH boat', + 'description': 'VIDEO: Humpback whale breaches right next to NH boat. Footage courtesy: Zach Fahey.', + 'duration': 22, + 'timestamp': 1534855680, + 'upload_date': '20180821', + 'uploader': 'ANV', + }, + 'params': { + 'skip_download': True, + }, + }, { + # from https://sanfrancisco.cbslocal.com/2016/06/17/source-oakland-cop-on-leave-for-having-girlfriend-help-with-police-reports/ + 'url': 'anvato:DVzl9QRzox3ZZsP9bNu5Li3X7obQOnqP:3417601', + 'only_matching': True, + }] + + def __init__(self, *args, **kwargs): + super(AnvatoIE, self).__init__(*args, **kwargs) + self.__server_time = None + + def _server_time(self, access_key, video_id): + if self.__server_time is not None: + return self.__server_time + + self.__server_time = int(self._download_json( + self._api_prefix(access_key) + 'server_time?anvack=' + access_key, video_id, + note='Fetching server time')['server_time']) + + return self.__server_time + + def _api_prefix(self, access_key): + return 'https://tkx2-%s.anvato.net/rest/v2/' % ('prod' if 'prod' in access_key else 'stage') + + def _get_video_json(self, access_key, video_id): + # See et() in anvplayer.min.js, which is an alias of getVideoJSON() + video_data_url = self._api_prefix(access_key) + 'mcp/video/%s?anvack=%s' % (video_id, access_key) + server_time = self._server_time(access_key, video_id) + input_data = '%d~%s~%s' % (server_time, md5_text(video_data_url), md5_text(server_time)) + + auth_secret = intlist_to_bytes(aes_encrypt( + bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY))) + + video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii') + anvrid = md5_text(time.time() * 1000 * random.random())[:30] + payload = { + 'api': { + 'anvrid': anvrid, + 'anvstk': md5_text('%s|%s|%d|%s' % ( + access_key, anvrid, server_time, + self._ANVACK_TABLE.get(access_key, self._API_KEY))), + 'anvts': server_time, + }, + } + + return self._download_json( + video_data_url, video_id, transform_source=strip_jsonp, + data=json.dumps(payload).encode('utf-8')) + + def _get_anvato_videos(self, access_key, video_id): + video_data = self._get_video_json(access_key, video_id) + + formats = [] + for published_url in video_data['published_urls']: + video_url = published_url['embed_url'] + media_format = published_url.get('format') + ext = determine_ext(video_url) + + if ext == 'smil' or media_format == 'smil': + formats.extend(self._extract_smil_formats(video_url, video_id)) + continue + + tbr = int_or_none(published_url.get('kbps')) + a_format = { + 'url': video_url, + 'format_id': ('-'.join(filter(None, ['http', published_url.get('cdn_name')]))).lower(), + 'tbr': tbr if tbr != 0 else None, + } + + if media_format == 'm3u8' and tbr is not None: + a_format.update({ + 'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])), + 'ext': 'mp4', + }) + elif media_format == 'm3u8-variant' or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + continue + elif ext == 'mp3' or media_format == 'mp3': + a_format['vcodec'] = 'none' + else: + a_format.update({ + 'width': int_or_none(published_url.get('width')), + 'height': int_or_none(published_url.get('height')), + }) + formats.append(a_format) + + self._sort_formats(formats) + + subtitles = {} + for caption in video_data.get('captions', []): + a_caption = { + 'url': caption['url'], + 'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None + } + subtitles.setdefault(caption['language'], []).append(a_caption) + + return { + 'id': video_id, + 'formats': formats, + 'title': video_data.get('def_title'), + 'description': video_data.get('def_description'), + 'tags': video_data.get('def_tags', '').split(','), + 'categories': video_data.get('categories'), + 'thumbnail': video_data.get('thumbnail'), + 'timestamp': int_or_none(video_data.get( + 'ts_published') or video_data.get('ts_added')), + 'uploader': video_data.get('mcp_id'), + 'duration': int_or_none(video_data.get('duration')), + 'subtitles': subtitles, + } + + @staticmethod + def _extract_urls(ie, webpage, video_id): + entries = [] + for mobj in re.finditer(AnvatoIE._ANVP_RE, webpage): + anvplayer_data = ie._parse_json( + mobj.group('anvp'), video_id, transform_source=unescapeHTML, + fatal=False) + if not anvplayer_data: + continue + video = anvplayer_data.get('video') + if not isinstance(video, compat_str) or not video.isdigit(): + continue + access_key = anvplayer_data.get('accessKey') + if not access_key: + mcp = anvplayer_data.get('mcp') + if mcp: + access_key = AnvatoIE._MCP_TO_ACCESS_KEY_TABLE.get( + mcp.lower()) + if not access_key: + continue + entries.append(ie.url_result( + 'anvato:%s:%s' % (access_key, video), ie=AnvatoIE.ie_key(), + video_id=video)) + return entries + + def _extract_anvato_videos(self, webpage, video_id): + anvplayer_data = self._parse_json( + self._html_search_regex( + self._ANVP_RE, webpage, 'Anvato player data', group='anvp'), + video_id) + return self._get_anvato_videos( + anvplayer_data['accessKey'], anvplayer_data['video']) + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + self._initialize_geo_bypass({ + 'countries': smuggled_data.get('geo_countries'), + }) + + mobj = re.match(self._VALID_URL, url) + access_key, video_id = mobj.group('access_key_or_mcp', 'id') + if access_key not in self._ANVACK_TABLE: + access_key = self._MCP_TO_ACCESS_KEY_TABLE.get( + access_key) or access_key + return self._get_anvato_videos(access_key, video_id) diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py new file mode 100644 index 000000000..e87994a6a --- /dev/null +++ b/youtube_dl/extractor/aol.py @@ -0,0 +1,133 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_urlparse, +) +from ..utils import ( + ExtractorError, + int_or_none, + url_or_none, +) + + +class AolIE(InfoExtractor): + IE_NAME = 'aol.com' + _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>[0-9a-f]+)' + + _TESTS = [{ + # video with 5min ID + 'url': 'https://www.aol.com/video/view/u-s--official-warns-of-largest-ever-irs-phone-scam/518167793/', + 'md5': '18ef68f48740e86ae94b98da815eec42', + 'info_dict': { + 'id': '518167793', + 'ext': 'mp4', + 'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam', + 'description': 'A major phone scam has cost thousands of taxpayers more than $1 million, with less than a month until income tax returns are due to the IRS.', + 'timestamp': 1395405060, + 'upload_date': '20140321', + 'uploader': 'Newsy Studio', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + # video with vidible ID + 'url': 'https://www.aol.com/video/view/netflix-is-raising-rates/5707d6b8e4b090497b04f706/', + 'info_dict': { + 'id': '5707d6b8e4b090497b04f706', + 'ext': 'mp4', + 'title': 'Netflix is Raising Rates', + 'description': 'Netflix is rewarding millions of it’s long-standing members with an increase in cost. Veuer’s Carly Figueroa has more.', + 'upload_date': '20160408', + 'timestamp': 1460123280, + 'uploader': 'Veuer', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'https://www.aol.com/video/view/park-bench-season-2-trailer/559a1b9be4b0c3bfad3357a7/', + 'only_matching': True, + }, { + 'url': 'https://www.aol.com/video/view/donald-trump-spokeswoman-tones-down-megyn-kelly-attacks/519442220/', + 'only_matching': True, + }, { + 'url': 'aol-video:5707d6b8e4b090497b04f706', + 'only_matching': True, + }, { + 'url': 'https://www.aol.com/video/playlist/PL8245/5ca79d19d21f1a04035db606/', + 'only_matching': True, + }, { + 'url': 'https://www.aol.ca/video/view/u-s-woman-s-family-arrested-for-murder-first-pinned-on-panhandler-police/5c7ccf45bc03931fa04b2fe1/', + 'only_matching': True, + }, { + 'url': 'https://www.aol.co.uk/video/view/-one-dead-and-22-hurt-in-bus-crash-/5cb3a6f3d21f1a072b457347/', + 'only_matching': True, + }, { + 'url': 'https://www.aol.de/video/view/eva-braun-privataufnahmen-von-hitlers-geliebter-werden-digitalisiert/5cb2d49de98ab54c113d3d5d/', + 'only_matching': True, + }, { + 'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + response = self._download_json( + 'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id, + video_id)['response'] + if response['statusText'] != 'Ok': + raise ExtractorError('%s said: %s' % (self.IE_NAME, response['statusText']), expected=True) + + video_data = response['data'] + formats = [] + m3u8_url = url_or_none(video_data.get('videoMasterPlaylist')) + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + for rendition in video_data.get('renditions', []): + video_url = url_or_none(rendition.get('url')) + if not video_url: + continue + ext = rendition.get('format') + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + else: + f = { + 'url': video_url, + 'format_id': rendition.get('quality'), + } + mobj = re.search(r'(\d+)x(\d+)', video_url) + if mobj: + f.update({ + 'width': int(mobj.group(1)), + 'height': int(mobj.group(2)), + }) + else: + qs = compat_parse_qs(compat_urllib_parse_urlparse(video_url).query) + f.update({ + 'width': int_or_none(qs.get('w', [None])[0]), + 'height': int_or_none(qs.get('h', [None])[0]), + }) + formats.append(f) + self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) + + return { + 'id': video_id, + 'title': video_data['title'], + 'duration': int_or_none(video_data.get('duration')), + 'timestamp': int_or_none(video_data.get('publishDate')), + 'view_count': int_or_none(video_data.get('views')), + 'description': video_data.get('description'), + 'uploader': video_data.get('videoOwner'), + 'formats': formats, + } diff --git a/youtube_dl/extractor/apa.py b/youtube_dl/extractor/apa.py new file mode 100644 index 000000000..98ccdaa4a --- /dev/null +++ b/youtube_dl/extractor/apa.py @@ -0,0 +1,94 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + js_to_json, + url_or_none, +) + + +class APAIE(InfoExtractor): + _VALID_URL = r'https?://[^/]+\.apa\.at/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _TESTS = [{ + 'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029', + 'md5': '2b12292faeb0a7d930c778c7a5b4759b', + 'info_dict': { + 'id': 'jjv85FdZ', + 'ext': 'mp4', + 'title': '"Blau ist mysteriös": Die Blue Man Group im Interview', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 254, + 'timestamp': 1519211149, + 'upload_date': '20180221', + }, + }, { + 'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78', + 'only_matching': True, + }, { + 'url': 'http://uvp-rma.sf.apa.at/embed/70404cca-2f47-4855-bbb8-20b1fae58f76', + 'only_matching': True, + }, { + 'url': 'http://uvp-kleinezeitung.sf.apa.at/embed/f1c44979-dba2-4ebf-b021-e4cf2cac3c81', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') + for mobj in re.finditer( + r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//[^/]+\.apa\.at/embed/[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}.*?)\1', + webpage)] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + jwplatform_id = self._search_regex( + r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage, + 'jwplatform id', default=None) + + if jwplatform_id: + return self.url_result( + 'jwplatform:' + jwplatform_id, ie='JWPlatform', + video_id=video_id) + + sources = self._parse_json( + self._search_regex( + r'sources\s*=\s*(\[.+?\])\s*;', webpage, 'sources'), + video_id, transform_source=js_to_json) + + formats = [] + for source in sources: + if not isinstance(source, dict): + continue + source_url = url_or_none(source.get('file')) + if not source_url: + continue + ext = determine_ext(source_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + source_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': source_url, + }) + self._sort_formats(formats) + + thumbnail = self._search_regex( + r'image\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, + 'thumbnail', fatal=False, group='url') + + return { + 'id': video_id, + 'title': video_id, + 'thumbnail': thumbnail, + 'formats': formats, + } diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py new file mode 100644 index 000000000..883dcee7a --- /dev/null +++ b/youtube_dl/extractor/aparat.py @@ -0,0 +1,95 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + merge_dicts, + mimetype2ext, + url_or_none, +) + + +class AparatIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)' + + _TESTS = [{ + 'url': 'http://www.aparat.com/v/wP8On', + 'md5': '131aca2e14fe7c4dcb3c4877ba300c89', + 'info_dict': { + 'id': 'wP8On', + 'ext': 'mp4', + 'title': 'تیم گلکسی 11 - زومیت', + 'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028', + 'duration': 231, + 'timestamp': 1387394859, + 'upload_date': '20131218', + 'view_count': int, + }, + }, { + # multiple formats + 'url': 'https://www.aparat.com/v/8dflw/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + # Provides more metadata + webpage = self._download_webpage(url, video_id, fatal=False) + + if not webpage: + # Note: There is an easier-to-parse configuration at + # http://www.aparat.com/video/video/config/videohash/%video_id + # but the URL in there does not work + webpage = self._download_webpage( + 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id, + video_id) + + options = self._parse_json( + self._search_regex( + r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)', + webpage, 'options', group='value'), + video_id) + + player = options['plugins']['sabaPlayerPlugin'] + + formats = [] + for sources in player['multiSRC']: + for item in sources: + if not isinstance(item, dict): + continue + file_url = url_or_none(item.get('src')) + if not file_url: + continue + item_type = item.get('type') + if item_type == 'application/vnd.apple.mpegurl': + formats.extend(self._extract_m3u8_formats( + file_url, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) + else: + ext = mimetype2ext(item.get('type')) + label = item.get('label') + formats.append({ + 'url': file_url, + 'ext': ext, + 'format_id': 'http-%s' % (label or ext), + 'height': int_or_none(self._search_regex( + r'(\d+)[pP]', label or '', 'height', + default=None)), + }) + self._sort_formats( + formats, field_preference=('height', 'width', 'tbr', 'format_id')) + + info = self._search_json_ld(webpage, video_id, default={}) + + if not info.get('title'): + info['title'] = player['title'] + + return merge_dicts(info, { + 'id': video_id, + 'thumbnail': url_or_none(options.get('poster')), + 'duration': int_or_none(player.get('duration')), + 'formats': formats, + }) diff --git a/youtube_dl/extractor/appleconnect.py b/youtube_dl/extractor/appleconnect.py new file mode 100644 index 000000000..a84b8b1eb --- /dev/null +++ b/youtube_dl/extractor/appleconnect.py @@ -0,0 +1,50 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + str_to_int, + ExtractorError +) + + +class AppleConnectIE(InfoExtractor): + _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)' + _TEST = { + 'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3', + 'md5': 'e7c38568a01ea45402570e6029206723', + 'info_dict': { + 'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3', + 'ext': 'm4v', + 'title': 'Energy', + 'uploader': 'Drake', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20150710', + 'timestamp': 1436545535, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + try: + video_json = self._html_search_regex( + r'class="auc-video-data">(\{.*?\})', webpage, 'json') + except ExtractorError: + raise ExtractorError('This post doesn\'t contain a video', expected=True) + + video_data = self._parse_json(video_json, video_id) + timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp')) + like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count')) + + return { + 'id': video_id, + 'url': video_data['sslSrc'], + 'title': video_data['title'], + 'description': video_data['description'], + 'uploader': video_data['artistName'], + 'thumbnail': video_data['artworkUrl'], + 'timestamp': timestamp, + 'like_count': like_count, + } diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py new file mode 100644 index 000000000..a9ef733e0 --- /dev/null +++ b/youtube_dl/extractor/appletrailers.py @@ -0,0 +1,283 @@ +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( + int_or_none, + parse_duration, + unified_strdate, +) + + +class AppleTrailersIE(InfoExtractor): + IE_NAME = 'appletrailers' + _VALID_URL = r'https?://(?:www\.|movie)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)' + _TESTS = [{ + 'url': 'http://trailers.apple.com/trailers/wb/manofsteel/', + 'info_dict': { + 'id': '5111', + 'title': 'Man of Steel', + }, + 'playlist': [ + { + 'md5': 'd97a8e575432dbcb81b7c3acb741f8a8', + 'info_dict': { + 'id': 'manofsteel-trailer4', + 'ext': 'mov', + 'duration': 111, + 'title': 'Trailer 4', + 'upload_date': '20130523', + 'uploader_id': 'wb', + }, + }, + { + 'md5': 'b8017b7131b721fb4e8d6f49e1df908c', + 'info_dict': { + 'id': 'manofsteel-trailer3', + 'ext': 'mov', + 'duration': 182, + 'title': 'Trailer 3', + 'upload_date': '20130417', + 'uploader_id': 'wb', + }, + }, + { + 'md5': 'd0f1e1150989b9924679b441f3404d48', + 'info_dict': { + 'id': 'manofsteel-trailer', + 'ext': 'mov', + 'duration': 148, + 'title': 'Trailer', + 'upload_date': '20121212', + 'uploader_id': 'wb', + }, + }, + { + 'md5': '5fe08795b943eb2e757fa95cb6def1cb', + 'info_dict': { + 'id': 'manofsteel-teaser', + 'ext': 'mov', + 'duration': 93, + 'title': 'Teaser', + 'upload_date': '20120721', + 'uploader_id': 'wb', + }, + }, + ] + }, { + 'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', + 'info_dict': { + 'id': '4489', + 'title': 'Blackthorn', + }, + 'playlist_mincount': 2, + 'expected_warnings': ['Unable to download JSON metadata'], + }, { + # json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json + 'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/', + 'info_dict': { + 'id': '15881', + 'title': 'Kung Fu Panda 3', + }, + 'playlist_mincount': 4, + }, { + 'url': 'http://trailers.apple.com/ca/metropole/autrui/', + 'only_matching': True, + }, { + 'url': 'http://movietrailers.apple.com/trailers/focus_features/kuboandthetwostrings/', + 'only_matching': True, + }] + + _JSON_RE = r'iTunes.playURL\((.*?)\);' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + movie = mobj.group('movie') + uploader_id = mobj.group('company') + + webpage = self._download_webpage(url, movie) + film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id') + film_data = self._download_json( + 'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id, + film_id, fatal=False) + + if film_data: + entries = [] + for clip in film_data.get('clips', []): + clip_title = clip['title'] + + formats = [] + for version, version_data in clip.get('versions', {}).items(): + for size, size_data in version_data.get('sizes', {}).items(): + src = size_data.get('src') + if not src: + continue + formats.append({ + 'format_id': '%s-%s' % (version, size), + 'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src), + 'width': int_or_none(size_data.get('width')), + 'height': int_or_none(size_data.get('height')), + 'language': version[:2], + }) + self._sort_formats(formats) + + entries.append({ + 'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(), + 'formats': formats, + 'title': clip_title, + 'thumbnail': clip.get('screen') or clip.get('thumb'), + 'duration': parse_duration(clip.get('runtime') or clip.get('faded')), + 'upload_date': unified_strdate(clip.get('posted')), + 'uploader_id': uploader_id, + }) + + page_data = film_data.get('page', {}) + return self.playlist_result(entries, film_id, page_data.get('movie_title')) + + playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') + + def fix_html(s): + s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s) + s = re.sub(r'<img ([^<]*?)/?>', r'<img \1/>', s) + # The ' in the onClick attributes are not escaped, it couldn't be parsed + # like: http://trailers.apple.com/trailers/wb/gravity/ + + def _clean_json(m): + return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') + s = re.sub(self._JSON_RE, _clean_json, s) + s = '<html>%s</html>' % s + return s + doc = self._download_xml(playlist_url, movie, transform_source=fix_html) + + playlist = [] + for li in doc.findall('./div/ul/li'): + on_click = li.find('.//a').attrib['onClick'] + trailer_info_json = self._search_regex(self._JSON_RE, + on_click, 'trailer info') + trailer_info = json.loads(trailer_info_json) + first_url = trailer_info.get('url') + if not first_url: + continue + title = trailer_info['title'] + video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower() + thumbnail = li.find('.//img').attrib['src'] + upload_date = trailer_info['posted'].replace('-', '') + + runtime = trailer_info['runtime'] + m = re.search(r'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime) + duration = None + if m: + duration = 60 * int(m.group('minutes')) + int(m.group('seconds')) + + trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower() + settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id) + settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json') + + formats = [] + for format in settings['metadata']['sizes']: + # The src is a file pointing to the real video file + format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src']) + formats.append({ + 'url': format_url, + 'format': format['type'], + 'width': int_or_none(format['width']), + 'height': int_or_none(format['height']), + }) + + self._sort_formats(formats) + + playlist.append({ + '_type': 'video', + 'id': video_id, + 'formats': formats, + 'title': title, + 'duration': duration, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'uploader_id': uploader_id, + 'http_headers': { + 'User-Agent': 'QuickTime compatible (youtube-dl)', + }, + }) + + return { + '_type': 'playlist', + 'id': movie, + 'entries': playlist, + } + + +class AppleTrailersSectionIE(InfoExtractor): + IE_NAME = 'appletrailers:section' + _SECTIONS = { + 'justadded': { + 'feed_path': 'just_added', + 'title': 'Just Added', + }, + 'exclusive': { + 'feed_path': 'exclusive', + 'title': 'Exclusive', + }, + 'justhd': { + 'feed_path': 'just_hd', + 'title': 'Just HD', + }, + 'mostpopular': { + 'feed_path': 'most_pop', + 'title': 'Most Popular', + }, + 'moviestudios': { + 'feed_path': 'studios', + 'title': 'Movie Studios', + }, + } + _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS) + _TESTS = [{ + 'url': 'http://trailers.apple.com/#section=justadded', + 'info_dict': { + 'title': 'Just Added', + 'id': 'justadded', + }, + 'playlist_mincount': 80, + }, { + 'url': 'http://trailers.apple.com/#section=exclusive', + 'info_dict': { + 'title': 'Exclusive', + 'id': 'exclusive', + }, + 'playlist_mincount': 80, + }, { + 'url': 'http://trailers.apple.com/#section=justhd', + 'info_dict': { + 'title': 'Just HD', + 'id': 'justhd', + }, + 'playlist_mincount': 80, + }, { + 'url': 'http://trailers.apple.com/#section=mostpopular', + 'info_dict': { + 'title': 'Most Popular', + 'id': 'mostpopular', + }, + 'playlist_mincount': 30, + }, { + 'url': 'http://trailers.apple.com/#section=moviestudios', + 'info_dict': { + 'title': 'Movie Studios', + 'id': 'moviestudios', + }, + 'playlist_mincount': 80, + }] + + def _real_extract(self, url): + section = self._match_id(url) + section_data = self._download_json( + 'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'], + section) + entries = [ + self.url_result('http://trailers.apple.com' + e['location']) + for e in section_data] + return self.playlist_result(entries, section, self._SECTIONS[section]['title']) diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dl/extractor/archiveorg.py new file mode 100644 index 000000000..c79c58e82 --- /dev/null +++ b/youtube_dl/extractor/archiveorg.py @@ -0,0 +1,65 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + unified_strdate, + clean_html, +) + + +class ArchiveOrgIE(InfoExtractor): + IE_NAME = 'archive.org' + IE_DESC = 'archive.org videos' + _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$' + _TESTS = [{ + 'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect', + 'md5': '8af1d4cf447933ed3c7f4871162602db', + 'info_dict': { + 'id': 'XD300-23_68HighlightsAResearchCntAugHumanIntellect', + 'ext': 'ogg', + 'title': '1968 Demo - FJCC Conference Presentation Reel #1', + 'description': 'md5:da45c349df039f1cc8075268eb1b5c25', + 'upload_date': '19681210', + 'uploader': 'SRI International' + } + }, { + 'url': 'https://archive.org/details/Cops1922', + 'md5': '0869000b4ce265e8ca62738b336b268a', + 'info_dict': { + 'id': 'Cops1922', + 'ext': 'mp4', + 'title': 'Buster Keaton\'s "Cops" (1922)', + 'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6', + } + }, { + 'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'http://archive.org/embed/' + video_id, video_id) + jwplayer_playlist = self._parse_json(self._search_regex( + r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)", + webpage, 'jwplayer playlist'), video_id) + info = self._parse_jwplayer_data( + {'playlist': jwplayer_playlist}, video_id, base_url=url) + + def get_optional(metadata, field): + return metadata.get(field, [None])[0] + + metadata = self._download_json( + 'http://archive.org/details/' + video_id, video_id, query={ + 'output': 'json', + })['metadata'] + info.update({ + 'title': get_optional(metadata, 'title') or info.get('title'), + 'description': clean_html(get_optional(metadata, 'description')), + }) + if info.get('_type') != 'playlist': + info.update({ + 'uploader': get_optional(metadata, 'creator'), + 'upload_date': unified_strdate(get_optional(metadata, 'date')), + }) + return info diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py new file mode 100644 index 000000000..8adae4644 --- /dev/null +++ b/youtube_dl/extractor/ard.py @@ -0,0 +1,400 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .generic import GenericIE +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, + parse_duration, + qualities, + str_or_none, + try_get, + unified_strdate, + unified_timestamp, + update_url_query, + url_or_none, + xpath_text, +) +from ..compat import compat_etree_fromstring + + +class ARDMediathekIE(InfoExtractor): + IE_NAME = 'ARD:mediathek' + _VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?' + + _TESTS = [{ + # available till 26.07.2022 + 'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822', + 'info_dict': { + 'id': '44726822', + 'ext': 'mp4', + 'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?', + 'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5', + 'duration': 1740, + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872', + 'only_matching': True, + }, { + # audio + 'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086', + 'only_matching': True, + }, { + 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', + 'only_matching': True, + }, { + # audio + 'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158', + 'only_matching': True, + }, { + 'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url) + + def _extract_media_info(self, media_info_url, webpage, video_id): + media_info = self._download_json( + media_info_url, video_id, 'Downloading media JSON') + + formats = self._extract_formats(media_info, video_id) + + if not formats: + if '"fsk"' in webpage: + raise ExtractorError( + 'This video is only available after 20:00', expected=True) + elif media_info.get('_geoblocked'): + raise ExtractorError('This video is not available due to geo restriction', expected=True) + + self._sort_formats(formats) + + duration = int_or_none(media_info.get('_duration')) + thumbnail = media_info.get('_previewImage') + is_live = media_info.get('_isLive') is True + + subtitles = {} + subtitle_url = media_info.get('_subtitleUrl') + if subtitle_url: + subtitles['de'] = [{ + 'ext': 'ttml', + 'url': subtitle_url, + }] + + return { + 'id': video_id, + 'duration': duration, + 'thumbnail': thumbnail, + 'is_live': is_live, + 'formats': formats, + 'subtitles': subtitles, + } + + def _extract_formats(self, media_info, video_id): + type_ = media_info.get('_type') + media_array = media_info.get('_mediaArray', []) + formats = [] + for num, media in enumerate(media_array): + for stream in media.get('_mediaStreamArray', []): + stream_urls = stream.get('_stream') + if not stream_urls: + continue + if not isinstance(stream_urls, list): + stream_urls = [stream_urls] + quality = stream.get('_quality') + server = stream.get('_server') + for stream_url in stream_urls: + if not url_or_none(stream_url): + continue + ext = determine_ext(stream_url) + if quality != 'auto' and ext in ('f4m', 'm3u8'): + continue + if ext == 'f4m': + formats.extend(self._extract_f4m_formats( + update_url_query(stream_url, { + 'hdcore': '3.1.1', + 'plugin': 'aasp-3.1.1.69.124' + }), + video_id, f4m_id='hds', fatal=False)) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + else: + if server and server.startswith('rtmp'): + f = { + 'url': server, + 'play_path': stream_url, + 'format_id': 'a%s-rtmp-%s' % (num, quality), + } + else: + f = { + 'url': stream_url, + 'format_id': 'a%s-%s-%s' % (num, ext, quality) + } + m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url) + if m: + f.update({ + 'width': int(m.group('width')), + 'height': int(m.group('height')), + }) + if type_ == 'audio': + f['vcodec'] = 'none' + formats.append(f) + return formats + + def _real_extract(self, url): + # determine video id from url + m = re.match(self._VALID_URL, url) + + document_id = None + + numid = re.search(r'documentId=([0-9]+)', url) + if numid: + document_id = video_id = numid.group(1) + else: + video_id = m.group('video_id') + + webpage = self._download_webpage(url, video_id) + + ERRORS = ( + ('>Leider liegt eine Störung vor.', 'Video %s is unavailable'), + ('>Der gewünschte Beitrag ist nicht mehr verfügbar.<', + 'Video %s is no longer available'), + ) + + for pattern, message in ERRORS: + if pattern in webpage: + raise ExtractorError(message % video_id, expected=True) + + if re.search(r'[\?&]rss($|[=&])', url): + doc = compat_etree_fromstring(webpage.encode('utf-8')) + if doc.tag == 'rss': + return GenericIE()._extract_rss(url, video_id, doc) + + title = self._html_search_regex( + [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', + r'<meta name="dcterms\.title" content="(.*?)"/>', + r'<h4 class="headline">(.*?)</h4>', + r'<title[^>]*>(.*?)'], + webpage, 'title') + description = self._html_search_meta( + 'dcterms.abstract', webpage, 'description', default=None) + if description is None: + description = self._html_search_meta( + 'description', webpage, 'meta description', default=None) + if description is None: + description = self._html_search_regex( + r'(.+?)

    ', + webpage, 'teaser text', default=None) + + # Thumbnail is sometimes not present. + # It is in the mobile version, but that seems to use a different URL + # structure altogether. + thumbnail = self._og_search_thumbnail(webpage, default=None) + + media_streams = re.findall(r'''(?x) + mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s* + "([^"]+)"''', webpage) + + if media_streams: + QUALITIES = qualities(['lo', 'hi', 'hq']) + formats = [] + for furl in set(media_streams): + if furl.endswith('.f4m'): + fid = 'f4m' + else: + fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl) + fid = fid_m.group(1) if fid_m else None + formats.append({ + 'quality': QUALITIES(fid), + 'format_id': fid, + 'url': furl, + }) + self._sort_formats(formats) + info = { + 'formats': formats, + } + else: # request JSON file + if not document_id: + video_id = self._search_regex( + r'/play/(?:config|media)/(\d+)', webpage, 'media id') + info = self._extract_media_info( + 'http://www.ardmediathek.de/play/media/%s' % video_id, + webpage, video_id) + + info.update({ + 'id': video_id, + 'title': self._live_title(title) if info.get('is_live') else title, + 'description': description, + 'thumbnail': thumbnail, + }) + + return info + + +class ARDIE(InfoExtractor): + _VALID_URL = r'(?Phttps?://(www\.)?daserste\.de/[^?#]+/videos/(?P[^/?#]+)-(?P[0-9]+))\.html' + _TESTS = [{ + # available till 14.02.2019 + 'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html', + 'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49', + 'info_dict': { + 'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video', + 'id': '102', + 'ext': 'mp4', + 'duration': 4435.0, + 'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?', + 'upload_date': '20180214', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + }, { + 'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') + + player_url = mobj.group('mainurl') + '~playerXml.xml' + doc = self._download_xml(player_url, display_id) + video_node = doc.find('./video') + upload_date = unified_strdate(xpath_text( + video_node, './broadcastDate')) + thumbnail = xpath_text(video_node, './/teaserImage//variant/url') + + formats = [] + for a in video_node.findall('.//asset'): + f = { + 'format_id': a.attrib['type'], + 'width': int_or_none(a.find('./frameWidth').text), + 'height': int_or_none(a.find('./frameHeight').text), + 'vbr': int_or_none(a.find('./bitrateVideo').text), + 'abr': int_or_none(a.find('./bitrateAudio').text), + 'vcodec': a.find('./codecVideo').text, + 'tbr': int_or_none(a.find('./totalBitrate').text), + } + if a.find('./serverPrefix').text: + f['url'] = a.find('./serverPrefix').text + f['playpath'] = a.find('./fileName').text + else: + f['url'] = a.find('./fileName').text + formats.append(f) + self._sort_formats(formats) + + return { + 'id': mobj.group('id'), + 'formats': formats, + 'display_id': display_id, + 'title': video_node.find('./title').text, + 'duration': parse_duration(video_node.find('./duration').text), + 'upload_date': upload_date, + 'thumbnail': thumbnail, + } + + +class ARDBetaMediathekIE(InfoExtractor): + _VALID_URL = r'https://(?:beta|www)\.ardmediathek\.de/[^/]+/(?:player|live)/(?P[a-zA-Z0-9]+)(?:/(?P[^/?#]+))?' + _TESTS = [{ + 'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita', + 'md5': '2d02d996156ea3c397cfc5036b5d7f8f', + 'info_dict': { + 'display_id': 'die-robuste-roswita', + 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', + 'title': 'Tatort: Die robuste Roswita', + 'description': r're:^Der Mord.*trüber ist als die Ilm.', + 'duration': 5316, + 'thumbnail': 'https://img.ardmediathek.de/standard/00/55/43/59/34/-1774185891/16x9/960?mandant=ard', + 'upload_date': '20180826', + 'ext': 'mp4', + }, + }, { + 'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/', + 'only_matching': True, + }, { + 'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('video_id') + display_id = mobj.group('display_id') or video_id + + webpage = self._download_webpage(url, display_id) + data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json') + data = self._parse_json(data_json, display_id) + + res = { + 'id': video_id, + 'display_id': display_id, + } + formats = [] + subtitles = {} + geoblocked = False + for widget in data.values(): + if widget.get('_geoblocked') is True: + geoblocked = True + if '_duration' in widget: + res['duration'] = int_or_none(widget['_duration']) + if 'clipTitle' in widget: + res['title'] = widget['clipTitle'] + if '_previewImage' in widget: + res['thumbnail'] = widget['_previewImage'] + if 'broadcastedOn' in widget: + res['timestamp'] = unified_timestamp(widget['broadcastedOn']) + if 'synopsis' in widget: + res['description'] = widget['synopsis'] + subtitle_url = url_or_none(widget.get('_subtitleUrl')) + if subtitle_url: + subtitles.setdefault('de', []).append({ + 'ext': 'ttml', + 'url': subtitle_url, + }) + if '_quality' in widget: + format_url = url_or_none(try_get( + widget, lambda x: x['_stream']['json'][0])) + if not format_url: + continue + ext = determine_ext(format_url) + if ext == 'f4m': + formats.extend(self._extract_f4m_formats( + format_url + '?hdcore=3.11.0', + video_id, f4m_id='hds', fatal=False)) + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id='hls', + fatal=False)) + else: + # HTTP formats are not available when geoblocked is True, + # other formats are fine though + if geoblocked: + continue + quality = str_or_none(widget.get('_quality')) + formats.append({ + 'format_id': ('http-' + quality) if quality else 'http', + 'url': format_url, + 'preference': 10, # Plain HTTP, that's nice + }) + + if not formats and geoblocked: + self.raise_geo_restricted( + msg='This video is not available due to geoblocking', + countries=['DE']) + + self._sort_formats(formats) + res.update({ + 'subtitles': subtitles, + 'formats': formats, + }) + + return res diff --git a/youtube_dl/extractor/arkena.py b/youtube_dl/extractor/arkena.py new file mode 100644 index 000000000..854f58767 --- /dev/null +++ b/youtube_dl/extractor/arkena.py @@ -0,0 +1,133 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( + determine_ext, + ExtractorError, + float_or_none, + int_or_none, + mimetype2ext, + parse_iso8601, + strip_jsonp, +) + + +class ArkenaIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + video\.arkena\.com/play2/embed/player\?| + play\.arkena\.com/(?:config|embed)/avp/v\d/player/media/(?P[^/]+)/[^/]+/(?P\d+) + ) + ''' + _TESTS = [{ + 'url': 'https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411', + 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365', + 'info_dict': { + 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe', + 'ext': 'mp4', + 'title': 'Big Buck Bunny', + 'description': 'Royalty free test video', + 'timestamp': 1432816365, + 'upload_date': '20150528', + 'is_live': False, + }, + }, { + 'url': 'https://play.arkena.com/config/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411/?callbackMethod=jQuery1111023664739129262213_1469227693893', + 'only_matching': True, + }, { + 'url': 'http://play.arkena.com/config/avp/v1/player/media/327336/darkmatter/131064/?callbackMethod=jQuery1111002221189684892677_1469227595972', + 'only_matching': True, + }, { + 'url': 'http://play.arkena.com/embed/avp/v1/player/media/327336/darkmatter/131064/', + 'only_matching': True, + }, { + 'url': 'http://video.arkena.com/play2/embed/player?accountId=472718&mediaId=35763b3b-00090078-bf604299&pageStyling=styled', + 'only_matching': True, + }] + + @staticmethod + def _extract_url(webpage): + # See https://support.arkena.com/display/PLAY/Ways+to+embed+your+video + mobj = re.search( + r']+src=(["\'])(?P(?:https?:)?//play\.arkena\.com/embed/avp/.+?)\1', + webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + account_id = mobj.group('account_id') + + # Handle http://video.arkena.com/play2/embed/player URL + if not video_id: + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + video_id = qs.get('mediaId', [None])[0] + account_id = qs.get('accountId', [None])[0] + if not video_id or not account_id: + raise ExtractorError('Invalid URL', expected=True) + + playlist = self._download_json( + 'https://play.arkena.com/config/avp/v2/player/media/%s/0/%s/?callbackMethod=_' + % (video_id, account_id), + video_id, transform_source=strip_jsonp)['Playlist'][0] + + media_info = playlist['MediaInfo'] + title = media_info['Title'] + media_files = playlist['MediaFiles'] + + is_live = False + formats = [] + for kind_case, kind_formats in media_files.items(): + kind = kind_case.lower() + for f in kind_formats: + f_url = f.get('Url') + if not f_url: + continue + is_live = f.get('Live') == 'true' + exts = (mimetype2ext(f.get('Type')), determine_ext(f_url, None)) + if kind == 'm3u8' or 'm3u8' in exts: + formats.extend(self._extract_m3u8_formats( + f_url, video_id, 'mp4', 'm3u8_native', + m3u8_id=kind, fatal=False, live=is_live)) + elif kind == 'flash' or 'f4m' in exts: + formats.extend(self._extract_f4m_formats( + f_url, video_id, f4m_id=kind, fatal=False)) + elif kind == 'dash' or 'mpd' in exts: + formats.extend(self._extract_mpd_formats( + f_url, video_id, mpd_id=kind, fatal=False)) + elif kind == 'silverlight': + # TODO: process when ism is supported (see + # https://github.com/ytdl-org/youtube-dl/issues/8118) + continue + else: + tbr = float_or_none(f.get('Bitrate'), 1000) + formats.append({ + 'url': f_url, + 'format_id': '%s-%d' % (kind, tbr) if tbr else kind, + 'tbr': tbr, + }) + self._sort_formats(formats) + + description = media_info.get('Description') + video_id = media_info.get('VideoId') or video_id + timestamp = parse_iso8601(media_info.get('PublishDate')) + thumbnails = [{ + 'url': thumbnail['Url'], + 'width': int_or_none(thumbnail.get('Size')), + } for thumbnail in (media_info.get('Poster') or []) if thumbnail.get('Url')] + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'is_live': is_live, + 'thumbnails': thumbnails, + 'formats': formats, + } diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py new file mode 100644 index 000000000..2bd3bfe8a --- /dev/null +++ b/youtube_dl/extractor/arte.py @@ -0,0 +1,201 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + int_or_none, + qualities, + try_get, + unified_strdate, +) + +# There are different sources of video in arte.tv, the extraction process +# is different for each one. The videos usually expire in 7 days, so we can't +# add tests. + + +class ArteTVBaseIE(InfoExtractor): + def _extract_from_json_url(self, json_url, video_id, lang, title=None): + info = self._download_json(json_url, video_id) + player_info = info['videoJsonPlayer'] + + vsr = try_get(player_info, lambda x: x['VSR'], dict) + if not vsr: + error = None + if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error': + error = try_get( + player_info, lambda x: x['custom_msg']['msg'], compat_str) + if not error: + error = 'Video %s is not available' % player_info.get('VID') or video_id + raise ExtractorError(error, expected=True) + + upload_date_str = player_info.get('shootingDate') + if not upload_date_str: + upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0] + + title = (player_info.get('VTI') or title or player_info['VID']).strip() + subtitle = player_info.get('VSU', '').strip() + if subtitle: + title += ' - %s' % subtitle + + info_dict = { + 'id': player_info['VID'], + 'title': title, + 'description': player_info.get('VDE'), + 'upload_date': unified_strdate(upload_date_str), + 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), + } + qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ']) + + LANGS = { + 'fr': 'F', + 'de': 'A', + 'en': 'E[ANG]', + 'es': 'E[ESP]', + 'it': 'E[ITA]', + 'pl': 'E[POL]', + } + + langcode = LANGS.get(lang, lang) + + formats = [] + for format_id, format_dict in vsr.items(): + f = dict(format_dict) + versionCode = f.get('versionCode') + l = re.escape(langcode) + + # Language preference from most to least priority + # Reference: section 6.8 of + # https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf + PREFERENCES = ( + # original version in requested language, without subtitles + r'VO{0}$'.format(l), + # original version in requested language, with partial subtitles in requested language + r'VO{0}-ST{0}$'.format(l), + # original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language + r'VO{0}-STM{0}$'.format(l), + # non-original (dubbed) version in requested language, without subtitles + r'V{0}$'.format(l), + # non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language + r'V{0}-ST{0}$'.format(l), + # non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language + r'V{0}-STM{0}$'.format(l), + # original version in requested language, with partial subtitles in different language + r'VO{0}-ST(?!{0}).+?$'.format(l), + # original version in requested language, with subtitles for the deaf and hard-of-hearing in different language + r'VO{0}-STM(?!{0}).+?$'.format(l), + # original version in different language, with partial subtitles in requested language + r'VO(?:(?!{0}).+?)?-ST{0}$'.format(l), + # original version in different language, with subtitles for the deaf and hard-of-hearing in requested language + r'VO(?:(?!{0}).+?)?-STM{0}$'.format(l), + # original version in different language, without subtitles + r'VO(?:(?!{0}))?$'.format(l), + # original version in different language, with partial subtitles in different language + r'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l), + # original version in different language, with subtitles for the deaf and hard-of-hearing in different language + r'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l), + ) + + for pref, p in enumerate(PREFERENCES): + if re.match(p, versionCode): + lang_pref = len(PREFERENCES) - pref + break + else: + lang_pref = -1 + + format = { + 'format_id': format_id, + 'preference': -10 if f.get('videoFormat') == 'M3U8' else None, + 'language_preference': lang_pref, + 'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')), + 'width': int_or_none(f.get('width')), + 'height': int_or_none(f.get('height')), + 'tbr': int_or_none(f.get('bitrate')), + 'quality': qfunc(f.get('quality')), + } + + if f.get('mediaType') == 'rtmp': + format['url'] = f['streamer'] + format['play_path'] = 'mp4:' + f['url'] + format['ext'] = 'flv' + else: + format['url'] = f['url'] + + formats.append(format) + + self._check_formats(formats, video_id) + self._sort_formats(formats) + + info_dict['formats'] = formats + return info_dict + + +class ArteTVPlus7IE(ArteTVBaseIE): + IE_NAME = 'arte.tv:+7' + _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?Pfr|de|en|es|it|pl)/videos/(?P\d{6}-\d{3}-[AF])' + + _TESTS = [{ + 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', + 'info_dict': { + 'id': '088501-000-A', + 'ext': 'mp4', + 'title': 'Mexico: Stealing Petrol to Survive', + 'upload_date': '20190628', + }, + }] + + def _real_extract(self, url): + lang, video_id = re.match(self._VALID_URL, url).groups() + return self._extract_from_json_url( + 'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id), + video_id, lang) + + +class ArteTVEmbedIE(ArteTVPlus7IE): + IE_NAME = 'arte.tv:embed' + _VALID_URL = r'''(?x) + https://www\.arte\.tv + /player/v3/index\.php\?json_url= + (?P + https?://api\.arte\.tv/api/player/v1/config/ + (?P[^/]+)/(?P\d{6}-\d{3}-[AF]) + ) + ''' + + _TESTS = [] + + def _real_extract(self, url): + json_url, lang, video_id = re.match(self._VALID_URL, url).groups() + return self._extract_from_json_url(json_url, video_id, lang) + + +class ArteTVPlaylistIE(ArteTVBaseIE): + IE_NAME = 'arte.tv:playlist' + _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?Pfr|de|en|es|it|pl)/videos/(?PRC-\d{6})' + + _TESTS = [{ + 'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/', + 'info_dict': { + 'id': 'RC-016954', + 'title': 'Earn a Living', + 'description': 'md5:d322c55011514b3a7241f7fb80d494c2', + }, + 'playlist_mincount': 6, + }] + + def _real_extract(self, url): + lang, playlist_id = re.match(self._VALID_URL, url).groups() + collection = self._download_json( + 'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos' + % (lang, playlist_id), playlist_id) + title = collection.get('title') + description = collection.get('shortDescription') or collection.get('teaserText') + entries = [ + self._extract_from_json_url( + video['jsonUrl'], video.get('programId') or playlist_id, lang) + for video in collection['videos'] if video.get('jsonUrl')] + return self.playlist_result(entries, playlist_id, title, description) diff --git a/youtube_dl/extractor/asiancrush.py b/youtube_dl/extractor/asiancrush.py new file mode 100644 index 000000000..6d71c5ad5 --- /dev/null +++ b/youtube_dl/extractor/asiancrush.py @@ -0,0 +1,113 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .kaltura import KalturaIE +from ..utils import ( + extract_attributes, + remove_end, +) + + +class AsianCrushIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/video/(?:[^/]+/)?0+(?P\d+)v\b' + _TESTS = [{ + 'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/', + 'md5': 'c3b740e48d0ba002a42c0b72857beae6', + 'info_dict': { + 'id': '1_y4tmjm5r', + 'ext': 'mp4', + 'title': 'Women Who Flirt', + 'description': 'md5:3db14e9186197857e7063522cb89a805', + 'timestamp': 1496936429, + 'upload_date': '20170608', + 'uploader_id': 'craig@crifkin.com', + }, + }, { + 'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + entry_id, partner_id, title = [None] * 3 + + vars = self._parse_json( + self._search_regex( + r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars', + default='{}'), video_id, fatal=False) + if vars: + entry_id = vars.get('entry_id') + partner_id = vars.get('partner_id') + title = vars.get('vid_label') + + if not entry_id: + entry_id = self._search_regex( + r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id') + + player = self._download_webpage( + 'https://api.asiancrush.com/embeddedVideoPlayer', video_id, + query={'id': entry_id}) + + kaltura_id = self._search_regex( + r'entry_id["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', player, + 'kaltura id', group='id') + + if not partner_id: + partner_id = self._search_regex( + r'/p(?:artner_id)?/(\d+)', player, 'partner id', + default='513551') + + return self.url_result( + 'kaltura:%s:%s' % (partner_id, kaltura_id), + ie=KalturaIE.ie_key(), video_id=kaltura_id, + video_title=title) + + +class AsianCrushPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?asiancrush\.com/series/0+(?P\d+)s\b' + _TEST = { + 'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/', + 'info_dict': { + 'id': '12481', + 'title': 'Scholar Who Walks the Night', + 'description': 'md5:7addd7c5132a09fd4741152d96cce886', + }, + 'playlist_count': 20, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [] + + for mobj in re.finditer( + r']+href=(["\'])(?P%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL, + webpage): + attrs = extract_attributes(mobj.group(0)) + if attrs.get('class') == 'clearfix': + entries.append(self.url_result( + mobj.group('url'), ie=AsianCrushIE.ie_key())) + + title = remove_end( + self._html_search_regex( + r'(?s)]\bid=["\']movieTitle[^>]+>(.+?)', webpage, + 'title', default=None) or self._og_search_title( + webpage, default=None) or self._html_search_meta( + 'twitter:title', webpage, 'title', + default=None) or self._search_regex( + r'([^<]+)', webpage, 'title', fatal=False), + ' | AsianCrush') + + description = self._og_search_description( + webpage, default=None) or self._html_search_meta( + 'twitter:description', webpage, 'description', fatal=False) + + return self.playlist_result(entries, playlist_id, title, description) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py new file mode 100644 index 000000000..ae1c09427 --- /dev/null +++ b/youtube_dl/extractor/atresplayer.py @@ -0,0 +1,202 @@ +from __future__ import unicode_literals + +import time +import hmac +import hashlib +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + sanitized_Request, + urlencode_postdata, + xpath_text, +) + + +class AtresPlayerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P.+?)_\d+\.html' + _NETRC_MACHINE = 'atresplayer' + _TESTS = [ + { + 'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html', + 'md5': 'efd56753cda1bb64df52a3074f62e38a', + 'info_dict': { + 'id': 'capitulo-10-especial-solidario-nochebuena', + 'ext': 'mp4', + 'title': 'Especial Solidario de Nochebuena', + 'description': 'md5:e2d52ff12214fa937107d21064075bf1', + 'duration': 5527.6, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'skip': 'This video is only available for registered users' + }, + { + 'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html', + 'md5': '6e52cbb513c405e403dbacb7aacf8747', + 'info_dict': { + 'id': 'capitulo-112-david-bustamante', + 'ext': 'flv', + 'title': 'David Bustamante', + 'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6', + 'duration': 1439.0, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + }, + { + 'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html', + 'only_matching': True, + }, + ] + + _USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J' + _MAGIC = 'QWtMLXs414Yo+c#_+Q#K@NN)' + _TIMESTAMP_SHIFT = 30000 + + _TIME_API_URL = 'http://servicios.atresplayer.com/api/admin/time.json' + _URL_VIDEO_TEMPLATE = 'https://servicios.atresplayer.com/api/urlVideo/{1}/{0}/{1}|{2}|{3}.json' + _PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s' + _EPISODE_URL_TEMPLATE = 'http://www.atresplayer.com/episodexml/%s' + + _LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check' + + _ERRORS = { + 'UNPUBLISHED': 'We\'re sorry, but this video is not yet available.', + 'DELETED': 'This video has expired and is no longer available for online streaming.', + 'GEOUNPUBLISHED': 'We\'re sorry, but this video is not available in your region due to right restrictions.', + # 'PREMIUM': 'PREMIUM', + } + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_form = { + 'j_username': username, + 'j_password': password, + } + + request = sanitized_Request( + self._LOGIN_URL, urlencode_postdata(login_form)) + request.add_header('Content-Type', 'application/x-www-form-urlencoded') + response = self._download_webpage( + request, None, 'Logging in') + + error = self._html_search_regex( + r'(?s)]+class="[^"]*\blist_error\b[^"]*">(.+?)', + response, 'error', default=None) + if error: + raise ExtractorError( + 'Unable to login: %s' % error, expected=True) + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + episode_id = self._search_regex( + r'episode="([^"]+)"', webpage, 'episode id') + + request = sanitized_Request( + self._PLAYER_URL_TEMPLATE % episode_id, + headers={'User-Agent': self._USER_AGENT}) + player = self._download_json(request, episode_id, 'Downloading player JSON') + + episode_type = player.get('typeOfEpisode') + error_message = self._ERRORS.get(episode_type) + if error_message: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, error_message), expected=True) + + formats = [] + video_url = player.get('urlVideo') + if video_url: + format_info = { + 'url': video_url, + 'format_id': 'http', + } + mobj = re.search(r'(?P\d+)K_(?P\d+)x(?P\d+)', video_url) + if mobj: + format_info.update({ + 'width': int_or_none(mobj.group('width')), + 'height': int_or_none(mobj.group('height')), + 'tbr': int_or_none(mobj.group('bitrate')), + }) + formats.append(format_info) + + timestamp = int_or_none(self._download_webpage( + self._TIME_API_URL, + video_id, 'Downloading timestamp', fatal=False), 1000, time.time()) + timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT) + token = hmac.new( + self._MAGIC.encode('ascii'), + (episode_id + timestamp_shifted).encode('utf-8'), hashlib.md5 + ).hexdigest() + + request = sanitized_Request( + self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token), + headers={'User-Agent': self._USER_AGENT}) + + fmt_json = self._download_json( + request, video_id, 'Downloading windows video JSON') + + result = fmt_json.get('resultDes') + if result.lower() != 'ok': + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, result), expected=True) + + for format_id, video_url in fmt_json['resultObject'].items(): + if format_id == 'token' or not video_url.startswith('http'): + continue + if 'geodeswowsmpra3player' in video_url: + # f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0] + # f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path) + # this videos are protected by DRM, the f4m downloader doesn't support them + continue + video_url_hd = video_url.replace('free_es', 'es') + formats.extend(self._extract_f4m_formats( + video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds', + fatal=False)) + formats.extend(self._extract_mpd_formats( + video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash', + fatal=False)) + self._sort_formats(formats) + + path_data = player.get('pathData') + + episode = self._download_xml( + self._EPISODE_URL_TEMPLATE % path_data, video_id, + 'Downloading episode XML') + + duration = float_or_none(xpath_text( + episode, './media/asset/info/technical/contentDuration', 'duration')) + + art = episode.find('./media/asset/info/art') + title = xpath_text(art, './name', 'title') + description = xpath_text(art, './description', 'description') + thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail') + + subtitles = {} + subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle') + if subtitle_url: + subtitles['es'] = [{ + 'ext': 'srt', + 'url': subtitle_url, + }] + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/youtube_dl/extractor/atttechchannel.py b/youtube_dl/extractor/atttechchannel.py new file mode 100644 index 000000000..8f93fb353 --- /dev/null +++ b/youtube_dl/extractor/atttechchannel.py @@ -0,0 +1,55 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import unified_strdate + + +class ATTTechChannelIE(InfoExtractor): + _VALID_URL = r'https?://techchannel\.att\.com/play-video\.cfm/([^/]+/)*(?P.+)' + _TEST = { + 'url': 'http://techchannel.att.com/play-video.cfm/2014/1/27/ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use', + 'info_dict': { + 'id': '11316', + 'display_id': 'ATT-Archives-The-UNIX-System-Making-Computers-Easier-to-Use', + 'ext': 'flv', + 'title': 'AT&T Archives : The UNIX System: Making Computers Easier to Use', + 'description': 'A 1982 film about UNIX is the foundation for software in use around Bell Labs and AT&T.', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20140127', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_url = self._search_regex( + r"url\s*:\s*'(rtmp://[^']+)'", + webpage, 'video URL') + + video_id = self._search_regex( + r'mediaid\s*=\s*(\d+)', + webpage, 'video id', fatal=False) + + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + upload_date = unified_strdate(self._search_regex( + r'[Rr]elease\s+date:\s*(\d{1,2}/\d{1,2}/\d{4})', + webpage, 'upload date', fatal=False), False) + + return { + 'id': video_id, + 'display_id': display_id, + 'url': video_url, + 'ext': 'flv', + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + } diff --git a/youtube_dl/extractor/atvat.py b/youtube_dl/extractor/atvat.py new file mode 100644 index 000000000..95e572d70 --- /dev/null +++ b/youtube_dl/extractor/atvat.py @@ -0,0 +1,75 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + unescapeHTML, +) + + +class ATVAtIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?atv\.at/(?:[^/]+/){2}(?P[dv]\d+)' + _TESTS = [{ + 'url': 'http://atv.at/aktuell/di-210317-2005-uhr/v1698449/', + 'md5': 'c3b6b975fb3150fc628572939df205f2', + 'info_dict': { + 'id': '1698447', + 'ext': 'mp4', + 'title': 'DI, 21.03.17 | 20:05 Uhr 1/1', + } + }, { + 'url': 'http://atv.at/aktuell/meinrad-knapp/d8416/', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_data = self._parse_json(unescapeHTML(self._search_regex( + [r'flashPlayerOptions\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P[^"]+)"'], + webpage, 'player data', group='json')), + display_id)['config']['initial_video'] + + video_id = video_data['id'] + video_title = video_data['title'] + + parts = [] + for part in video_data.get('parts', []): + part_id = part['id'] + part_title = part['title'] + + formats = [] + for source in part.get('sources', []): + source_url = source.get('src') + if not source_url: + continue + ext = determine_ext(source_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + source_url, part_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'format_id': source.get('delivery'), + 'url': source_url, + }) + self._sort_formats(formats) + + parts.append({ + 'id': part_id, + 'title': part_title, + 'thumbnail': part.get('preview_image_url'), + 'duration': int_or_none(part.get('duration')), + 'is_live': part.get('is_livestream'), + 'formats': formats, + }) + + return { + '_type': 'multi_video', + 'id': video_id, + 'title': video_title, + 'entries': parts, + } diff --git a/youtube_dl/extractor/audimedia.py b/youtube_dl/extractor/audimedia.py new file mode 100644 index 000000000..6bd48ef15 --- /dev/null +++ b/youtube_dl/extractor/audimedia.py @@ -0,0 +1,93 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_iso8601, +) + + +class AudiMediaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?audi-mediacenter\.com/(?:en|de)/audimediatv/(?:video/)?(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://www.audi-mediacenter.com/en/audimediatv/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-1467', + 'md5': '79a8b71c46d49042609795ab59779b66', + 'info_dict': { + 'id': '1565', + 'ext': 'mp4', + 'title': '60 Seconds of Audi Sport 104/2015 - WEC Bahrain, Rookie Test', + 'description': 'md5:60e5d30a78ced725f7b8d34370762941', + 'upload_date': '20151124', + 'timestamp': 1448354940, + 'duration': 74022, + 'view_count': int, + } + }, { + 'url': 'https://www.audi-mediacenter.com/en/audimediatv/video/60-seconds-of-audi-sport-104-2015-wec-bahrain-rookie-test-2991', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + raw_payload = self._search_regex([ + r'class="amtv-embed"[^>]+id="([0-9a-z-]+)"', + r'id="([0-9a-z-]+)"[^>]+class="amtv-embed"', + r'class=\\"amtv-embed\\"[^>]+id=\\"([0-9a-z-]+)\\"', + r'id=\\"([0-9a-z-]+)\\"[^>]+class=\\"amtv-embed\\"', + r'id=(?:\\)?"(amtve-[a-z]-\d+-[a-z]{2})', + ], webpage, 'raw payload') + _, stage_mode, video_id, _ = raw_payload.split('-') + + # TODO: handle s and e stage_mode (live streams and ended live streams) + if stage_mode not in ('s', 'e'): + video_data = self._download_json( + 'https://www.audimedia.tv/api/video/v1/videos/' + video_id, + video_id, query={ + 'embed[]': ['video_versions', 'thumbnail_image'], + })['results'] + formats = [] + + stream_url_hls = video_data.get('stream_url_hls') + if stream_url_hls: + formats.extend(self._extract_m3u8_formats( + stream_url_hls, video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + + stream_url_hds = video_data.get('stream_url_hds') + if stream_url_hds: + formats.extend(self._extract_f4m_formats( + stream_url_hds + '?hdcore=3.4.0', + video_id, f4m_id='hds', fatal=False)) + + for video_version in video_data.get('video_versions', []): + video_version_url = video_version.get('download_url') or video_version.get('stream_url') + if not video_version_url: + continue + f = { + 'url': video_version_url, + 'width': int_or_none(video_version.get('width')), + 'height': int_or_none(video_version.get('height')), + 'abr': int_or_none(video_version.get('audio_bitrate')), + 'vbr': int_or_none(video_version.get('video_bitrate')), + } + bitrate = self._search_regex(r'(\d+)k', video_version_url, 'bitrate', default=None) + if bitrate: + f.update({ + 'format_id': 'http-%s' % bitrate, + }) + formats.append(f) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_data['title'], + 'description': video_data.get('subtitle'), + 'thumbnail': video_data.get('thumbnail_image', {}).get('file'), + 'timestamp': parse_iso8601(video_data.get('publication_date')), + 'duration': int_or_none(video_data.get('duration')), + 'view_count': int_or_none(video_data.get('view_count')), + 'formats': formats, + } diff --git a/youtube_dl/extractor/audioboom.py b/youtube_dl/extractor/audioboom.py new file mode 100644 index 000000000..393f381c6 --- /dev/null +++ b/youtube_dl/extractor/audioboom.py @@ -0,0 +1,69 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import float_or_none + + +class AudioBoomIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0', + 'md5': '63a8d73a055c6ed0f1e51921a10a5a76', + 'info_dict': { + 'id': '4279833', + 'ext': 'mp3', + 'title': '3/09/2016 Czaban Hour 3', + 'description': 'Guest: Nate Davis - NFL free agency, Guest: Stan Gans', + 'duration': 2245.72, + 'uploader': 'SB Nation A.M.', + 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio', + } + }, { + 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + clip = None + + clip_store = self._parse_json( + self._search_regex( + r'data-new-clip-store=(["\'])(?P{.*?"clipId"\s*:\s*%s.*?})\1' % video_id, + webpage, 'clip store', default='{}', group='json'), + video_id, fatal=False) + if clip_store: + clips = clip_store.get('clips') + if clips and isinstance(clips, list) and isinstance(clips[0], dict): + clip = clips[0] + + def from_clip(field): + if clip: + return clip.get(field) + + audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property( + 'audio', webpage, 'audio url') + title = from_clip('title') or self._og_search_title(webpage) + description = from_clip('description') or self._og_search_description(webpage) + + duration = float_or_none(from_clip('duration') or self._html_search_meta( + 'weibo:audio:duration', webpage)) + + uploader = from_clip('author') or self._og_search_property( + 'audio:artist', webpage, 'uploader', fatal=False) + uploader_url = from_clip('author_url') or self._html_search_meta( + 'audioboo:channel', webpage, 'uploader url') + + return { + 'id': video_id, + 'url': audio_url, + 'title': title, + 'description': description, + 'duration': duration, + 'uploader': uploader, + 'uploader_url': uploader_url, + } diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dl/extractor/audiomack.py new file mode 100644 index 000000000..cc7771354 --- /dev/null +++ b/youtube_dl/extractor/audiomack.py @@ -0,0 +1,145 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools +import time + +from .common import InfoExtractor +from .soundcloud import SoundcloudIE +from ..compat import compat_str +from ..utils import ( + ExtractorError, + url_basename, +) + + +class AudiomackIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P[\w/-]+)' + IE_NAME = 'audiomack' + _TESTS = [ + # hosted on audiomack + { + 'url': 'http://www.audiomack.com/song/roosh-williams/extraordinary', + 'info_dict': + { + 'id': '310086', + 'ext': 'mp3', + 'uploader': 'Roosh Williams', + 'title': 'Extraordinary' + } + }, + # audiomack wrapper around soundcloud song + { + 'add_ie': ['Soundcloud'], + 'url': 'http://www.audiomack.com/song/hip-hop-daily/black-mamba-freestyle', + 'info_dict': { + 'id': '258901379', + 'ext': 'mp3', + 'description': 'mamba day freestyle for the legend Kobe Bryant ', + 'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]', + 'uploader': 'ILOVEMAKONNEN', + 'upload_date': '20160414', + } + }, + ] + + def _real_extract(self, url): + # URLs end with [uploader name]/[uploader title] + # this title is whatever the user types in, and is rarely + # the proper song title. Real metadata is in the api response + album_url_tag = self._match_id(url) + + # Request the extended version of the api for extra fields like artist and title + api_response = self._download_json( + 'http://www.audiomack.com/api/music/url/song/%s?extended=1&_=%d' % ( + album_url_tag, time.time()), + album_url_tag) + + # API is inconsistent with errors + if 'url' not in api_response or not api_response['url'] or 'error' in api_response: + raise ExtractorError('Invalid url %s' % url) + + # Audiomack wraps a lot of soundcloud tracks in their branded wrapper + # if so, pass the work off to the soundcloud extractor + if SoundcloudIE.suitable(api_response['url']): + return self.url_result(api_response['url'], SoundcloudIE.ie_key()) + + return { + 'id': compat_str(api_response.get('id', album_url_tag)), + 'uploader': api_response.get('artist'), + 'title': api_response.get('title'), + 'url': api_response['url'], + } + + +class AudiomackAlbumIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P[\w/-]+)' + IE_NAME = 'audiomack:album' + _TESTS = [ + # Standard album playlist + { + 'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape', + 'playlist_count': 15, + 'info_dict': + { + 'id': '812251', + 'title': 'Tha Tour: Part 2 (Official Mixtape)' + } + }, + # Album playlist ripped from fakeshoredrive with no metadata + { + 'url': 'http://www.audiomack.com/album/fakeshoredrive/ppp-pistol-p-project', + 'info_dict': { + 'title': 'PPP (Pistol P Project)', + 'id': '837572', + }, + 'playlist': [{ + 'info_dict': { + 'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)', + 'id': '837577', + 'ext': 'mp3', + 'uploader': 'Lil Herb a.k.a. G Herbo', + } + }], + 'params': { + 'playliststart': 9, + 'playlistend': 9, + } + } + ] + + def _real_extract(self, url): + # URLs end with [uploader name]/[uploader title] + # this title is whatever the user types in, and is rarely + # the proper song title. Real metadata is in the api response + album_url_tag = self._match_id(url) + result = {'_type': 'playlist', 'entries': []} + # There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata + # Therefore we don't know how many songs the album has and must infi-loop until failure + for track_no in itertools.count(): + # Get song's metadata + api_response = self._download_json( + 'http://www.audiomack.com/api/music/url/album/%s/%d?extended=1&_=%d' + % (album_url_tag, track_no, time.time()), album_url_tag, + note='Querying song information (%d)' % (track_no + 1)) + + # Total failure, only occurs when url is totally wrong + # Won't happen in middle of valid playlist (next case) + if 'url' not in api_response or 'error' in api_response: + raise ExtractorError('Invalid url for track %d of album url %s' % (track_no, url)) + # URL is good but song id doesn't exist - usually means end of playlist + elif not api_response['url']: + break + else: + # Pull out the album metadata and add to result (if it exists) + for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]: + if apikey in api_response and resultkey not in result: + result[resultkey] = api_response[apikey] + song_id = url_basename(api_response['url']).rpartition('.')[0] + result['entries'].append({ + 'id': compat_str(api_response.get('id', song_id)), + 'uploader': api_response.get('artist'), + 'title': api_response.get('title', song_id), + 'url': api_response['url'], + }) + return result diff --git a/youtube_dl/extractor/awaan.py b/youtube_dl/extractor/awaan.py new file mode 100644 index 000000000..a2603bbff --- /dev/null +++ b/youtube_dl/extractor/awaan.py @@ -0,0 +1,185 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import base64 + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_urlencode, + compat_str, +) +from ..utils import ( + int_or_none, + parse_iso8601, + smuggle_url, + unsmuggle_url, + urlencode_postdata, +) + + +class AWAANIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P\d+)/[^/]+(?:/(?P\d+)/(?P\d+))?' + + def _real_extract(self, url): + show_id, video_id, season_id = re.match(self._VALID_URL, url).groups() + if video_id and int(video_id) > 0: + return self.url_result( + 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo') + elif season_id and int(season_id) > 0: + return self.url_result(smuggle_url( + 'http://awaan.ae/program/season/%s' % season_id, + {'show_id': show_id}), 'AWAANSeason') + else: + return self.url_result( + 'http://awaan.ae/program/%s' % show_id, 'AWAANSeason') + + +class AWAANBaseIE(InfoExtractor): + def _parse_video_data(self, video_data, video_id, is_live): + title = video_data.get('title_en') or video_data['title_ar'] + img = video_data.get('img') + + return { + 'id': video_id, + 'title': self._live_title(title) if is_live else title, + 'description': video_data.get('description_en') or video_data.get('description_ar'), + 'thumbnail': 'http://admin.mangomolo.com/analytics/%s' % img if img else None, + 'duration': int_or_none(video_data.get('duration')), + 'timestamp': parse_iso8601(video_data.get('create_time'), ' '), + 'is_live': is_live, + } + + +class AWAANVideoIE(AWAANBaseIE): + IE_NAME = 'awaan:video' + _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P\d+)' + _TESTS = [{ + 'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375', + 'md5': '5f61c33bfc7794315c671a62d43116aa', + 'info_dict': + { + 'id': '17375', + 'ext': 'mp4', + 'title': 'رحلة العمر : الحلقة 1', + 'description': 'md5:0156e935d870acb8ef0a66d24070c6d6', + 'duration': 2041, + 'timestamp': 1227504126, + 'upload_date': '20081124', + 'uploader_id': '71', + }, + }, { + 'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video_data = self._download_json( + 'http://admin.mangomolo.com/analytics/index.php/plus/video?id=%s' % video_id, + video_id, headers={'Origin': 'http://awaan.ae'}) + info = self._parse_video_data(video_data, video_id, False) + + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/video?' + compat_urllib_parse_urlencode({ + 'id': video_data['id'], + 'user_id': video_data['user_id'], + 'signature': video_data['signature'], + 'countries': 'Q0M=', + 'filter': 'DENY', + }) + info.update({ + '_type': 'url_transparent', + 'url': embed_url, + 'ie_key': 'MangomoloVideo', + }) + return info + + +class AWAANLiveIE(AWAANBaseIE): + IE_NAME = 'awaan:live' + _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P\d+)' + _TEST = { + 'url': 'http://awaan.ae/live/6/dubai-tv', + 'info_dict': { + 'id': '6', + 'ext': 'mp4', + 'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'upload_date': '20150107', + 'timestamp': 1420588800, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + channel_id = self._match_id(url) + + channel_data = self._download_json( + 'http://admin.mangomolo.com/analytics/index.php/plus/getchanneldetails?channel_id=%s' % channel_id, + channel_id, headers={'Origin': 'http://awaan.ae'}) + info = self._parse_video_data(channel_data, channel_id, True) + + embed_url = 'http://admin.mangomolo.com/analytics/index.php/customers/embed/index?' + compat_urllib_parse_urlencode({ + 'id': base64.b64encode(channel_data['user_id'].encode()).decode(), + 'channelid': base64.b64encode(channel_data['id'].encode()).decode(), + 'signature': channel_data['signature'], + 'countries': 'Q0M=', + 'filter': 'DENY', + }) + info.update({ + '_type': 'url_transparent', + 'url': embed_url, + 'ie_key': 'MangomoloLive', + }) + return info + + +class AWAANSeasonIE(InfoExtractor): + IE_NAME = 'awaan:season' + _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P\d+)|season/(?P\d+))' + _TEST = { + 'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A', + 'info_dict': + { + 'id': '7910', + 'title': 'محاضرات الشيخ الشعراوي', + }, + 'playlist_mincount': 27, + } + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + show_id, season_id = re.match(self._VALID_URL, url).groups() + + data = {} + if season_id: + data['season'] = season_id + show_id = smuggled_data.get('show_id') + if show_id is None: + season = self._download_json( + 'http://admin.mangomolo.com/analytics/index.php/plus/season_info?id=%s' % season_id, + season_id, headers={'Origin': 'http://awaan.ae'}) + show_id = season['id'] + data['show_id'] = show_id + show = self._download_json( + 'http://admin.mangomolo.com/analytics/index.php/plus/show', + show_id, data=urlencode_postdata(data), headers={ + 'Origin': 'http://awaan.ae', + 'Content-Type': 'application/x-www-form-urlencoded' + }) + if not season_id: + season_id = show['default_season'] + for season in show['seasons']: + if season['id'] == season_id: + title = season.get('title_en') or season['title_ar'] + + entries = [] + for video in show['videos']: + video_id = compat_str(video['id']) + entries.append(self.url_result( + 'http://awaan.ae/media/%s' % video_id, 'AWAANVideo', video_id)) + + return self.playlist_result(entries, season_id, title) diff --git a/youtube_dl/extractor/aws.py b/youtube_dl/extractor/aws.py new file mode 100644 index 000000000..dccfeaf73 --- /dev/null +++ b/youtube_dl/extractor/aws.py @@ -0,0 +1,78 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import datetime +import hashlib +import hmac + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlencode + + +class AWSIE(InfoExtractor): + _AWS_ALGORITHM = 'AWS4-HMAC-SHA256' + _AWS_REGION = 'us-east-1' + + def _aws_execute_api(self, aws_dict, video_id, query=None): + query = query or {} + amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ') + date = amz_date[:8] + headers = { + 'Accept': 'application/json', + 'Host': self._AWS_PROXY_HOST, + 'X-Amz-Date': amz_date, + 'X-Api-Key': self._AWS_API_KEY + } + session_token = aws_dict.get('session_token') + if session_token: + headers['X-Amz-Security-Token'] = session_token + + def aws_hash(s): + return hashlib.sha256(s.encode('utf-8')).hexdigest() + + # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html + canonical_querystring = compat_urllib_parse_urlencode(query) + canonical_headers = '' + for header_name, header_value in sorted(headers.items()): + canonical_headers += '%s:%s\n' % (header_name.lower(), header_value) + signed_headers = ';'.join([header.lower() for header in sorted(headers.keys())]) + canonical_request = '\n'.join([ + 'GET', + aws_dict['uri'], + canonical_querystring, + canonical_headers, + signed_headers, + aws_hash('') + ]) + + # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html + credential_scope_list = [date, self._AWS_REGION, 'execute-api', 'aws4_request'] + credential_scope = '/'.join(credential_scope_list) + string_to_sign = '\n'.join([self._AWS_ALGORITHM, amz_date, credential_scope, aws_hash(canonical_request)]) + + # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html + def aws_hmac(key, msg): + return hmac.new(key, msg.encode('utf-8'), hashlib.sha256) + + def aws_hmac_digest(key, msg): + return aws_hmac(key, msg).digest() + + def aws_hmac_hexdigest(key, msg): + return aws_hmac(key, msg).hexdigest() + + k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8') + for value in credential_scope_list: + k_signing = aws_hmac_digest(k_signing, value) + + signature = aws_hmac_hexdigest(k_signing, string_to_sign) + + # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html + headers['Authorization'] = ', '.join([ + '%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope), + 'SignedHeaders=%s' % signed_headers, + 'Signature=%s' % signature, + ]) + + return self._download_json( + 'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''), + video_id, headers=headers) diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py new file mode 100644 index 000000000..fcbdc71b9 --- /dev/null +++ b/youtube_dl/extractor/azmedien.py @@ -0,0 +1,86 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from .kaltura import KalturaIE + + +class AZMedienIE(InfoExtractor): + IE_DESC = 'AZ Medien videos' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?P + telezueri\.ch| + telebaern\.tv| + telem1\.ch + )/ + [^/]+/ + (?P + [^/]+-(?P\d+) + ) + (?: + \#video= + (?P + [_0-9a-z]+ + ) + )? + ''' + + _TESTS = [{ + 'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569', + 'info_dict': { + 'id': '1_anruz3wy', + 'ext': 'mp4', + 'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen', + 'uploader_id': 'TVOnline', + 'upload_date': '20180930', + 'timestamp': 1538328802, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1', + 'only_matching': True + }] + + _PARTNER_ID = '1719221' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + video_id = mobj.group('id') + entry_id = mobj.group('kaltura_id') + + if not entry_id: + api_url = 'https://www.%s/api/pub/gql/%s' % (host, host.split('.')[0]) + payload = { + 'query': '''query VideoContext($articleId: ID!) { + article: node(id: $articleId) { + ... on Article { + mainAssetRelation { + asset { + ... on VideoAsset { + kalturaId + } + } + } + } + } + }''', + 'variables': {'articleId': 'Article:%s' % mobj.group('article_id')}, + } + json_data = self._download_json( + api_url, video_id, headers={ + 'Content-Type': 'application/json', + }, + data=json.dumps(payload).encode()) + entry_id = json_data['data']['article']['mainAssetRelation']['asset']['kalturaId'] + + return self.url_result( + 'kaltura:%s:%s' % (self._PARTNER_ID, entry_id), + ie=KalturaIE.ie_key(), video_id=entry_id) diff --git a/youtube_dl/extractor/baidu.py b/youtube_dl/extractor/baidu.py new file mode 100644 index 000000000..234a661d3 --- /dev/null +++ b/youtube_dl/extractor/baidu.py @@ -0,0 +1,56 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import unescapeHTML + + +class BaiduVideoIE(InfoExtractor): + IE_DESC = '百度视频' + _VALID_URL = r'https?://v\.baidu\.com/(?P[a-z]+)/(?P\d+)\.htm' + _TESTS = [{ + 'url': 'http://v.baidu.com/comic/1069.htm?frp=bdbrand&q=%E4%B8%AD%E5%8D%8E%E5%B0%8F%E5%BD%93%E5%AE%B6', + 'info_dict': { + 'id': '1069', + 'title': '中华小当家 TV版国语', + 'description': 'md5:51be07afe461cf99fa61231421b5397c', + }, + 'playlist_count': 52, + }, { + 'url': 'http://v.baidu.com/show/11595.htm?frp=bdbrand', + 'info_dict': { + 'id': '11595', + 'title': 're:^奔跑吧兄弟', + 'description': 'md5:1bf88bad6d850930f542d51547c089b8', + }, + 'playlist_mincount': 12, + }] + + def _call_api(self, path, category, playlist_id, note): + return self._download_json('http://app.video.baidu.com/%s/?worktype=adnative%s&id=%s' % ( + path, category, playlist_id), playlist_id, note) + + def _real_extract(self, url): + category, playlist_id = re.match(self._VALID_URL, url).groups() + if category == 'show': + category = 'tvshow' + if category == 'tv': + category = 'tvplay' + + playlist_detail = self._call_api( + 'xqinfo', category, playlist_id, 'Download playlist JSON metadata') + + playlist_title = playlist_detail['title'] + playlist_description = unescapeHTML(playlist_detail.get('intro')) + + episodes_detail = self._call_api( + 'xqsingle', category, playlist_id, 'Download episodes JSON metadata') + + entries = [self.url_result( + episode['url'], video_title=episode['title'] + ) for episode in episodes_detail['videos']] + + return self.playlist_result( + entries, playlist_id, playlist_title, playlist_description) diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py new file mode 100644 index 000000000..4400ff9c1 --- /dev/null +++ b/youtube_dl/extractor/bambuser.py @@ -0,0 +1,142 @@ +from __future__ import unicode_literals + +import re +import itertools + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + sanitized_Request, + urlencode_postdata, +) + + +class BambuserIE(InfoExtractor): + IE_NAME = 'bambuser' + _VALID_URL = r'https?://bambuser\.com/v/(?P\d+)' + _API_KEY = '005f64509e19a868399060af746a00aa' + _LOGIN_URL = 'https://bambuser.com/user' + _NETRC_MACHINE = 'bambuser' + + _TEST = { + 'url': 'http://bambuser.com/v/4050584', + # MD5 seems to be flaky, see https://travis-ci.org/ytdl-org/youtube-dl/jobs/14051016#L388 + # 'md5': 'fba8f7693e48fd4e8641b3fd5539a641', + 'info_dict': { + 'id': '4050584', + 'ext': 'flv', + 'title': 'Education engineering days - lightning talks', + 'duration': 3741, + 'uploader': 'pixelversity', + 'uploader_id': '344706', + 'timestamp': 1382976692, + 'upload_date': '20131028', + 'view_count': int, + }, + 'params': { + # It doesn't respect the 'Range' header, it would download the whole video + # caused the travis builds to fail: https://travis-ci.org/ytdl-org/youtube-dl/jobs/14493845#L59 + 'skip_download': True, + }, + } + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_form = { + 'form_id': 'user_login', + 'op': 'Log in', + 'name': username, + 'pass': password, + } + + request = sanitized_Request( + self._LOGIN_URL, urlencode_postdata(login_form)) + request.add_header('Referer', self._LOGIN_URL) + response = self._download_webpage( + request, None, 'Logging in') + + login_error = self._html_search_regex( + r'(?s)
    (.+?)
    ', + response, 'login error', default=None) + if login_error: + raise ExtractorError( + 'Unable to login: %s' % login_error, expected=True) + + def _real_initialize(self): + self._login() + + def _real_extract(self, url): + video_id = self._match_id(url) + + info = self._download_json( + 'http://player-c.api.bambuser.com/getVideo.json?api_key=%s&vid=%s' + % (self._API_KEY, video_id), video_id) + + error = info.get('error') + if error: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, error), expected=True) + + result = info['result'] + + return { + 'id': video_id, + 'title': result['title'], + 'url': result['url'], + 'thumbnail': result.get('preview'), + 'duration': int_or_none(result.get('length')), + 'uploader': result.get('username'), + 'uploader_id': compat_str(result.get('owner', {}).get('uid')), + 'timestamp': int_or_none(result.get('created')), + 'fps': float_or_none(result.get('framerate')), + 'view_count': int_or_none(result.get('views_total')), + 'comment_count': int_or_none(result.get('comment_count')), + } + + +class BambuserChannelIE(InfoExtractor): + IE_NAME = 'bambuser:channel' + _VALID_URL = r'https?://bambuser\.com/channel/(?P.*?)(?:/|#|\?|$)' + # The maximum number we can get with each request + _STEP = 50 + _TEST = { + 'url': 'http://bambuser.com/channel/pixelversity', + 'info_dict': { + 'title': 'pixelversity', + }, + 'playlist_mincount': 60, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + user = mobj.group('user') + urls = [] + last_id = '' + for i in itertools.count(1): + req_url = ( + 'http://bambuser.com/xhr-api/index.php?username={user}' + '&sort=created&access_mode=0%2C1%2C2&limit={count}' + '&method=broadcast&format=json&vid_older_than={last}' + ).format(user=user, count=self._STEP, last=last_id) + req = sanitized_Request(req_url) + # Without setting this header, we wouldn't get any result + req.add_header('Referer', 'http://bambuser.com/channel/%s' % user) + data = self._download_json( + req, user, 'Downloading page %d' % i) + results = data['result'] + if not results: + break + last_id = results[-1]['vid'] + urls.extend(self.url_result(v['page'], 'Bambuser') for v in results) + + return { + '_type': 'playlist', + 'title': user, + 'entries': urls, + } diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py new file mode 100644 index 000000000..f14b407dc --- /dev/null +++ b/youtube_dl/extractor/bandcamp.py @@ -0,0 +1,417 @@ +from __future__ import unicode_literals + +import random +import re +import time + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urlparse, +) +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + KNOWN_EXTENSIONS, + parse_filesize, + str_or_none, + try_get, + unescapeHTML, + update_url_query, + unified_strdate, + unified_timestamp, + url_or_none, +) + + +class BandcampIE(InfoExtractor): + _VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song', + 'md5': 'c557841d5e50261777a6585648adf439', + 'info_dict': { + 'id': '1812978515', + 'ext': 'mp3', + 'title': "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad", + 'duration': 9.8485, + }, + '_skip': 'There is a limit of 200 free downloads / month for the test song' + }, { + # free download + 'url': 'http://benprunty.bandcamp.com/track/lanius-battle', + 'md5': '853e35bf34aa1d6fe2615ae612564b36', + 'info_dict': { + 'id': '2650410135', + 'ext': 'aiff', + 'title': 'Ben Prunty - Lanius (Battle)', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Ben Prunty', + 'timestamp': 1396508491, + 'upload_date': '20140403', + 'release_date': '20140403', + 'duration': 260.877, + 'track': 'Lanius (Battle)', + 'track_number': 1, + 'track_id': '2650410135', + 'artist': 'Ben Prunty', + 'album': 'FTL: Advanced Edition Soundtrack', + }, + }, { + # no free download, mp3 128 + 'url': 'https://relapsealumni.bandcamp.com/track/hail-to-fire', + 'md5': 'fec12ff55e804bb7f7ebeb77a800c8b7', + 'info_dict': { + 'id': '2584466013', + 'ext': 'mp3', + 'title': 'Mastodon - Hail to Fire', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Mastodon', + 'timestamp': 1322005399, + 'upload_date': '20111122', + 'release_date': '20040207', + 'duration': 120.79, + 'track': 'Hail to Fire', + 'track_number': 5, + 'track_id': '2584466013', + 'artist': 'Mastodon', + 'album': 'Call of the Mastodon', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + title = mobj.group('title') + webpage = self._download_webpage(url, title) + thumbnail = self._html_search_meta('og:image', webpage, default=None) + + track_id = None + track = None + track_number = None + duration = None + + formats = [] + track_info = self._parse_json( + self._search_regex( + r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n', + webpage, 'track info', default='{}'), title) + if track_info: + file_ = track_info.get('file') + if isinstance(file_, dict): + for format_id, format_url in file_.items(): + if not url_or_none(format_url): + continue + ext, abr_str = format_id.split('-', 1) + formats.append({ + 'format_id': format_id, + 'url': self._proto_relative_url(format_url, 'http:'), + 'ext': ext, + 'vcodec': 'none', + 'acodec': ext, + 'abr': int_or_none(abr_str), + }) + track = track_info.get('title') + track_id = str_or_none(track_info.get('track_id') or track_info.get('id')) + track_number = int_or_none(track_info.get('track_num')) + duration = float_or_none(track_info.get('duration')) + + def extract(key): + return self._search_regex( + r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key, + webpage, key, default=None, group='value') + + artist = extract('artist') + album = extract('album_title') + timestamp = unified_timestamp( + extract('publish_date') or extract('album_publish_date')) + release_date = unified_strdate(extract('album_release_date')) + + download_link = self._search_regex( + r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, + 'download link', default=None, group='url') + if download_link: + track_id = self._search_regex( + r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$', + webpage, 'track id') + + download_webpage = self._download_webpage( + download_link, track_id, 'Downloading free downloads page') + + blob = self._parse_json( + self._search_regex( + r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage, + 'blob', group='blob'), + track_id, transform_source=unescapeHTML) + + info = try_get( + blob, (lambda x: x['digital_items'][0], + lambda x: x['download_items'][0]), dict) + if info: + downloads = info.get('downloads') + if isinstance(downloads, dict): + if not track: + track = info.get('title') + if not artist: + artist = info.get('artist') + if not thumbnail: + thumbnail = info.get('thumb_url') + + download_formats = {} + download_formats_list = blob.get('download_formats') + if isinstance(download_formats_list, list): + for f in blob['download_formats']: + name, ext = f.get('name'), f.get('file_extension') + if all(isinstance(x, compat_str) for x in (name, ext)): + download_formats[name] = ext.strip('.') + + for format_id, f in downloads.items(): + format_url = f.get('url') + if not format_url: + continue + # Stat URL generation algorithm is reverse engineered from + # download_*_bundle_*.js + stat_url = update_url_query( + format_url.replace('/download/', '/statdownload/'), { + '.rand': int(time.time() * 1000 * random.random()), + }) + format_id = f.get('encoding_name') or format_id + stat = self._download_json( + stat_url, track_id, 'Downloading %s JSON' % format_id, + transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1], + fatal=False) + if not stat: + continue + retry_url = url_or_none(stat.get('retry_url')) + if not retry_url: + continue + formats.append({ + 'url': self._proto_relative_url(retry_url, 'http:'), + 'ext': download_formats.get(format_id), + 'format_id': format_id, + 'format_note': f.get('description'), + 'filesize': parse_filesize(f.get('size_mb')), + 'vcodec': 'none', + }) + + self._sort_formats(formats) + + title = '%s - %s' % (artist, track) if artist else track + + if not duration: + duration = float_or_none(self._html_search_meta( + 'duration', webpage, default=None)) + + return { + 'id': track_id, + 'title': title, + 'thumbnail': thumbnail, + 'uploader': artist, + 'timestamp': timestamp, + 'release_date': release_date, + 'duration': duration, + 'track': track, + 'track_number': track_number, + 'track_id': track_id, + 'artist': artist, + 'album': album, + 'formats': formats, + } + + +class BandcampAlbumIE(InfoExtractor): + IE_NAME = 'Bandcamp:album' + _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?' + + _TESTS = [{ + 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', + 'playlist': [ + { + 'md5': '39bc1eded3476e927c724321ddf116cf', + 'info_dict': { + 'id': '1353101989', + 'ext': 'mp3', + 'title': 'Intro', + } + }, + { + 'md5': '1a2c32e2691474643e912cc6cd4bffaa', + 'info_dict': { + 'id': '38097443', + 'ext': 'mp3', + 'title': 'Kero One - Keep It Alive (Blazo remix)', + } + }, + ], + 'info_dict': { + 'title': 'Jazz Format Mixtape vol.1', + 'id': 'jazz-format-mixtape-vol-1', + 'uploader_id': 'blazo', + }, + 'params': { + 'playlistend': 2 + }, + 'skip': 'Bandcamp imposes download limits.' + }, { + 'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave', + 'info_dict': { + 'title': 'Hierophany of the Open Grave', + 'uploader_id': 'nightbringer', + 'id': 'hierophany-of-the-open-grave', + }, + 'playlist_mincount': 9, + }, { + 'url': 'http://dotscale.bandcamp.com', + 'info_dict': { + 'title': 'Loom', + 'id': 'dotscale', + 'uploader_id': 'dotscale', + }, + 'playlist_mincount': 7, + }, { + # with escaped quote in title + 'url': 'https://jstrecords.bandcamp.com/album/entropy-ep', + 'info_dict': { + 'title': '"Entropy" EP', + 'uploader_id': 'jstrecords', + 'id': 'entropy-ep', + }, + 'playlist_mincount': 3, + }, { + # not all tracks have songs + 'url': 'https://insulters.bandcamp.com/album/we-are-the-plague', + 'info_dict': { + 'id': 'we-are-the-plague', + 'title': 'WE ARE THE PLAGUE', + 'uploader_id': 'insulters', + }, + 'playlist_count': 2, + }] + + @classmethod + def suitable(cls, url): + return (False + if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url) + else super(BandcampAlbumIE, cls).suitable(url)) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + uploader_id = mobj.group('subdomain') + album_id = mobj.group('album_id') + playlist_id = album_id or uploader_id + webpage = self._download_webpage(url, playlist_id) + track_elements = re.findall( + r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage) + if not track_elements: + raise ExtractorError('The page doesn\'t contain any tracks') + # Only tracks with duration info have songs + entries = [ + self.url_result( + compat_urlparse.urljoin(url, t_path), + ie=BandcampIE.ie_key(), + video_title=self._search_regex( + r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)', + elem_content, 'track title', fatal=False)) + for elem_content, t_path in track_elements + if self._html_search_meta('duration', elem_content, default=None)] + + title = self._html_search_regex( + r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"', + webpage, 'title', fatal=False) + if title: + title = title.replace(r'\"', '"') + return { + '_type': 'playlist', + 'uploader_id': uploader_id, + 'id': playlist_id, + 'title': title, + 'entries': entries, + } + + +class BandcampWeeklyIE(InfoExtractor): + IE_NAME = 'Bandcamp:weekly' + _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://bandcamp.com/?show=224', + 'md5': 'b00df799c733cf7e0c567ed187dea0fd', + 'info_dict': { + 'id': '224', + 'ext': 'opus', + 'title': 'BC Weekly April 4th 2017 - Magic Moments', + 'description': 'md5:5d48150916e8e02d030623a48512c874', + 'duration': 5829.77, + 'release_date': '20170404', + 'series': 'Bandcamp Weekly', + 'episode': 'Magic Moments', + 'episode_number': 208, + 'episode_id': '224', + } + }, { + 'url': 'https://bandcamp.com/?blah/blah@&show=228', + 'only_matching': True + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + blob = self._parse_json( + self._search_regex( + r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage, + 'blob', group='blob'), + video_id, transform_source=unescapeHTML) + + show = blob['bcw_show'] + + # This is desired because any invalid show id redirects to `bandcamp.com` + # which happens to expose the latest Bandcamp Weekly episode. + show_id = int_or_none(show.get('show_id')) or int_or_none(video_id) + + formats = [] + for format_id, format_url in show['audio_stream'].items(): + if not url_or_none(format_url): + continue + for known_ext in KNOWN_EXTENSIONS: + if known_ext in format_id: + ext = known_ext + break + else: + ext = None + formats.append({ + 'format_id': format_id, + 'url': format_url, + 'ext': ext, + 'vcodec': 'none', + }) + self._sort_formats(formats) + + title = show.get('audio_title') or 'Bandcamp Weekly' + subtitle = show.get('subtitle') + if subtitle: + title += ' - %s' % subtitle + + episode_number = None + seq = blob.get('bcw_seq') + + if seq and isinstance(seq, list): + try: + episode_number = next( + int_or_none(e.get('episode_number')) + for e in seq + if isinstance(e, dict) and int_or_none(e.get('id')) == show_id) + except StopIteration: + pass + + return { + 'id': video_id, + 'title': title, + 'description': show.get('desc') or show.get('short_desc'), + 'duration': float_or_none(show.get('audio_duration')), + 'is_live': False, + 'release_date': unified_strdate(show.get('published_date')), + 'series': 'Bandcamp Weekly', + 'episode': show.get('subtitle'), + 'episode_number': episode_number, + 'episode_id': compat_str(video_id), + 'formats': formats + } diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py new file mode 100644 index 000000000..e76507951 --- /dev/null +++ b/youtube_dl/extractor/bbc.py @@ -0,0 +1,1344 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + dict_get, + ExtractorError, + float_or_none, + get_element_by_class, + int_or_none, + js_to_json, + parse_duration, + parse_iso8601, + try_get, + unescapeHTML, + url_or_none, + urlencode_postdata, + urljoin, +) +from ..compat import ( + compat_etree_Element, + compat_HTTPError, + compat_urlparse, +) + + +class BBCCoUkIE(InfoExtractor): + IE_NAME = 'bbc.co.uk' + IE_DESC = 'BBC iPlayer' + _ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?bbc\.co\.uk/ + (?: + programmes/(?!articles/)| + iplayer(?:/[^/]+)?/(?:episode/|playlist/)| + music/(?:clips|audiovideo/popular)[/#]| + radio/player/| + events/[^/]+/play/[^/]+/ + ) + (?P<id>%s)(?!/(?:episodes|broadcasts|clips)) + ''' % _ID_REGEX + + _LOGIN_URL = 'https://account.bbc.com/signin' + _NETRC_MACHINE = 'bbc' + + _MEDIASELECTOR_URLS = [ + # Provides HQ HLS streams with even better quality that pc mediaset but fails + # with geolocation in some cases when it's even not geo restricted at all (e.g. + # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable. + 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s', + 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s', + ] + + _MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection' + _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist' + + _NAMESPACES = ( + _MEDIASELECTION_NS, + _EMP_PLAYLIST_NS, + ) + + _TESTS = [ + { + 'url': 'http://www.bbc.co.uk/programmes/b039g8p7', + 'info_dict': { + 'id': 'b039d07m', + 'ext': 'flv', + 'title': 'Leonard Cohen, Kaleidoscope - BBC Radio 4', + 'description': 'The Canadian poet and songwriter reflects on his musical career.', + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, + { + 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/', + 'info_dict': { + 'id': 'b00yng1d', + 'ext': 'flv', + 'title': 'The Man in Black: Series 3: The Printed Name', + 'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.", + 'duration': 1800, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'Episode is no longer available on BBC iPlayer Radio', + }, + { + 'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/', + 'info_dict': { + 'id': 'b00yng1d', + 'ext': 'flv', + 'title': 'The Voice UK: Series 3: Blind Auditions 5', + 'description': 'Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.', + 'duration': 5100, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only', + }, + { + 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion', + 'info_dict': { + 'id': 'b03k3pb7', + 'ext': 'flv', + 'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction", + 'description': '2. Invasion', + 'duration': 3600, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only', + }, { + 'url': 'http://www.bbc.co.uk/programmes/b04v20dw', + 'info_dict': { + 'id': 'b04v209v', + 'ext': 'flv', + 'title': 'Pete Tong, The Essential New Tune Special', + 'description': "Pete has a very special mix - all of 2014's Essential New Tunes!", + 'duration': 10800, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'Episode is no longer available on BBC iPlayer Radio', + }, { + 'url': 'http://www.bbc.co.uk/music/clips/p022h44b', + 'note': 'Audio', + 'info_dict': { + 'id': 'p022h44j', + 'ext': 'flv', + 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances', + 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.", + 'duration': 227, + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { + 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz', + 'note': 'Video', + 'info_dict': { + 'id': 'p025c103', + 'ext': 'flv', + 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)', + 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014', + 'duration': 226, + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { + 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls', + 'info_dict': { + 'id': 'p02n76xf', + 'ext': 'flv', + 'title': 'Natural World, 2015-2016: 2. Super Powered Owls', + 'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d', + 'duration': 3540, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'geolocation', + }, { + 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition', + 'info_dict': { + 'id': 'b05zmgw1', + 'ext': 'flv', + 'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.', + 'title': 'Royal Academy Summer Exhibition', + 'duration': 3540, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'geolocation', + }, { + # iptv-all mediaset fails with geolocation however there is no geo restriction + # for this programme at all + 'url': 'http://www.bbc.co.uk/programmes/b06rkn85', + 'info_dict': { + 'id': 'b06rkms3', + 'ext': 'flv', + 'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1", + 'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!", + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'Now it\'s really geo-restricted', + }, { + # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147) + 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player', + 'info_dict': { + 'id': 'p028bfkj', + 'ext': 'flv', + 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews', + 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, { + 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4', + 'only_matching': True, + }, { + 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3', + 'only_matching': True, + }, { + 'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo', + 'only_matching': True, + }, { + 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf', + 'only_matching': True, + }, { + 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55', + 'only_matching': True, + }, { + 'url': 'http://www.bbc.co.uk/programmes/w3csv1y9', + 'only_matching': True, + }, { + 'url': 'https://www.bbc.co.uk/programmes/m00005xn', + 'only_matching': True, + }, { + 'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s', + 'only_matching': True, + }] + + _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8' + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading signin page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'username': username, + 'password': password, + }) + + post_url = urljoin(self._LOGIN_URL, self._search_regex( + r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, + 'post url', default=self._LOGIN_URL, group='url')) + + response, urlh = self._download_webpage_handle( + post_url, None, 'Logging in', data=urlencode_postdata(login_form), + headers={'Referer': self._LOGIN_URL}) + + if self._LOGIN_URL in urlh.geturl(): + error = clean_html(get_element_by_class('form-message', response)) + if error: + raise ExtractorError( + 'Unable to login: %s' % error, expected=True) + raise ExtractorError('Unable to log in') + + def _real_initialize(self): + self._login() + + class MediaSelectionError(Exception): + def __init__(self, id): + self.id = id + + def _extract_asx_playlist(self, connection, programme_id): + asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist') + return [ref.get('href') for ref in asx.findall('./Entry/ref')] + + def _extract_items(self, playlist): + return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS) + + def _findall_ns(self, element, xpath): + elements = [] + for ns in self._NAMESPACES: + elements.extend(element.findall(xpath % ns)) + return elements + + def _extract_medias(self, media_selection): + error = media_selection.find('./{%s}error' % self._MEDIASELECTION_NS) + if error is None: + media_selection.find('./{%s}error' % self._EMP_PLAYLIST_NS) + if error is not None: + raise BBCCoUkIE.MediaSelectionError(error.get('id')) + return self._findall_ns(media_selection, './{%s}media') + + def _extract_connections(self, media): + return self._findall_ns(media, './{%s}connection') + + def _get_subtitles(self, media, programme_id): + subtitles = {} + for connection in self._extract_connections(media): + cc_url = url_or_none(connection.get('href')) + if not cc_url: + continue + captions = self._download_xml( + cc_url, programme_id, 'Downloading captions', fatal=False) + if not isinstance(captions, compat_etree_Element): + continue + lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en') + subtitles[lang] = [ + { + 'url': connection.get('href'), + 'ext': 'ttml', + }, + ] + return subtitles + + def _raise_extractor_error(self, media_selection_error): + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, media_selection_error.id), + expected=True) + + def _download_media_selector(self, programme_id): + last_exception = None + for mediaselector_url in self._MEDIASELECTOR_URLS: + try: + return self._download_media_selector_url( + mediaselector_url % programme_id, programme_id) + except BBCCoUkIE.MediaSelectionError as e: + if e.id in ('notukerror', 'geolocation', 'selectionunavailable'): + last_exception = e + continue + self._raise_extractor_error(e) + self._raise_extractor_error(last_exception) + + def _download_media_selector_url(self, url, programme_id=None): + media_selection = self._download_xml( + url, programme_id, 'Downloading media selection XML', + expected_status=(403, 404)) + return self._process_media_selector(media_selection, programme_id) + + def _process_media_selector(self, media_selection, programme_id): + formats = [] + subtitles = None + urls = [] + + for media in self._extract_medias(media_selection): + kind = media.get('kind') + if kind in ('video', 'audio'): + bitrate = int_or_none(media.get('bitrate')) + encoding = media.get('encoding') + service = media.get('service') + width = int_or_none(media.get('width')) + height = int_or_none(media.get('height')) + file_size = int_or_none(media.get('media_file_size')) + for connection in self._extract_connections(media): + href = connection.get('href') + if href in urls: + continue + if href: + urls.append(href) + conn_kind = connection.get('kind') + protocol = connection.get('protocol') + supplier = connection.get('supplier') + transfer_format = connection.get('transferFormat') + format_id = supplier or conn_kind or protocol + if service: + format_id = '%s_%s' % (service, format_id) + # ASX playlist + if supplier == 'asx': + for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)): + formats.append({ + 'url': ref, + 'format_id': 'ref%s_%s' % (i, format_id), + }) + elif transfer_format == 'dash': + formats.extend(self._extract_mpd_formats( + href, programme_id, mpd_id=format_id, fatal=False)) + elif transfer_format == 'hls': + formats.extend(self._extract_m3u8_formats( + href, programme_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id=format_id, fatal=False)) + if re.search(self._USP_RE, href): + usp_formats = self._extract_m3u8_formats( + re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href), + programme_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id=format_id, fatal=False) + for f in usp_formats: + if f.get('height') and f['height'] > 720: + continue + formats.append(f) + elif transfer_format == 'hds': + formats.extend(self._extract_f4m_formats( + href, programme_id, f4m_id=format_id, fatal=False)) + else: + if not service and not supplier and bitrate: + format_id += '-%d' % bitrate + fmt = { + 'format_id': format_id, + 'filesize': file_size, + } + if kind == 'video': + fmt.update({ + 'width': width, + 'height': height, + 'tbr': bitrate, + 'vcodec': encoding, + }) + else: + fmt.update({ + 'abr': bitrate, + 'acodec': encoding, + 'vcodec': 'none', + }) + if protocol in ('http', 'https'): + # Direct link + fmt.update({ + 'url': href, + }) + elif protocol == 'rtmp': + application = connection.get('application', 'ondemand') + auth_string = connection.get('authString') + identifier = connection.get('identifier') + server = connection.get('server') + fmt.update({ + 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string), + 'play_path': identifier, + 'app': '%s?%s' % (application, auth_string), + 'page_url': 'http://www.bbc.co.uk', + 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf', + 'rtmp_live': False, + 'ext': 'flv', + }) + else: + continue + formats.append(fmt) + elif kind == 'captions': + subtitles = self.extract_subtitles(media, programme_id) + return formats, subtitles + + def _download_playlist(self, playlist_id): + try: + playlist = self._download_json( + 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id, + playlist_id, 'Downloading playlist JSON') + + version = playlist.get('defaultAvailableVersion') + if version: + smp_config = version['smpConfig'] + title = smp_config['title'] + description = smp_config['summary'] + for item in smp_config['items']: + kind = item['kind'] + if kind not in ('programme', 'radioProgramme'): + continue + programme_id = item.get('vpid') + duration = int_or_none(item.get('duration')) + formats, subtitles = self._download_media_selector(programme_id) + return programme_id, title, description, duration, formats, subtitles + except ExtractorError as ee: + if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404): + raise + + # fallback to legacy playlist + return self._process_legacy_playlist(playlist_id) + + def _process_legacy_playlist_url(self, url, display_id): + playlist = self._download_legacy_playlist_url(url, display_id) + return self._extract_from_legacy_playlist(playlist, display_id) + + def _process_legacy_playlist(self, playlist_id): + return self._process_legacy_playlist_url( + 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id) + + def _download_legacy_playlist_url(self, url, playlist_id=None): + return self._download_xml( + url, playlist_id, 'Downloading legacy playlist XML') + + def _extract_from_legacy_playlist(self, playlist, playlist_id): + no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS) + if no_items is not None: + reason = no_items.get('reason') + if reason == 'preAvailability': + msg = 'Episode %s is not yet available' % playlist_id + elif reason == 'postAvailability': + msg = 'Episode %s is no longer available' % playlist_id + elif reason == 'noMedia': + msg = 'Episode %s is not currently available' % playlist_id + else: + msg = 'Episode %s is not available: %s' % (playlist_id, reason) + raise ExtractorError(msg, expected=True) + + for item in self._extract_items(playlist): + kind = item.get('kind') + if kind not in ('programme', 'radioProgramme'): + continue + title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text + description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS) + description = description_el.text if description_el is not None else None + + def get_programme_id(item): + def get_from_attributes(item): + for p in('identifier', 'group'): + value = item.get(p) + if value and re.match(r'^[pb][\da-z]{7}$', value): + return value + get_from_attributes(item) + mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS) + if mediator is not None: + return get_from_attributes(mediator) + + programme_id = get_programme_id(item) + duration = int_or_none(item.get('duration')) + + if programme_id: + formats, subtitles = self._download_media_selector(programme_id) + else: + formats, subtitles = self._process_media_selector(item, playlist_id) + programme_id = playlist_id + + return programme_id, title, description, duration, formats, subtitles + + def _real_extract(self, url): + group_id = self._match_id(url) + + webpage = self._download_webpage(url, group_id, 'Downloading video page') + + error = self._search_regex( + r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<', + webpage, 'error', default=None) + if error: + raise ExtractorError(error, expected=True) + + programme_id = None + duration = None + + tviplayer = self._search_regex( + r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById', + webpage, 'player', default=None) + + if tviplayer: + player = self._parse_json(tviplayer, group_id).get('player', {}) + duration = int_or_none(player.get('duration')) + programme_id = player.get('vpid') + + if not programme_id: + programme_id = self._search_regex( + r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None) + + if programme_id: + formats, subtitles = self._download_media_selector(programme_id) + title = self._og_search_title(webpage, default=None) or self._html_search_regex( + (r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>', + r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title') + description = self._search_regex( + (r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>', + r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'), + webpage, 'description', default=None) + if not description: + description = self._html_search_meta('description', webpage) + else: + programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id) + + self._sort_formats(formats) + + return { + 'id': programme_id, + 'title': title, + 'description': description, + 'thumbnail': self._og_search_thumbnail(webpage, default=None), + 'duration': duration, + 'formats': formats, + 'subtitles': subtitles, + } + + +class BBCIE(BBCCoUkIE): + IE_NAME = 'bbc' + IE_DESC = 'BBC' + _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)' + + _MEDIASELECTOR_URLS = [ + # Provides HQ HLS streams but fails with geolocation in some cases when it's + # even not geo restricted at all + 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s', + # Provides more formats, namely direct mp4 links, but fails on some videos with + # notukerror for non UK (?) users (e.g. + # http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret) + 'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s', + # Provides fewer formats, but works everywhere for everybody (hopefully) + 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s', + ] + + _TESTS = [{ + # article with multiple videos embedded with data-playable containing vpids + 'url': 'http://www.bbc.com/news/world-europe-32668511', + 'info_dict': { + 'id': 'world-europe-32668511', + 'title': 'Russia stages massive WW2 parade despite Western boycott', + 'description': 'md5:00ff61976f6081841f759a08bf78cc9c', + }, + 'playlist_count': 2, + }, { + # article with multiple videos embedded with data-playable (more videos) + 'url': 'http://www.bbc.com/news/business-28299555', + 'info_dict': { + 'id': 'business-28299555', + 'title': 'Farnborough Airshow: Video highlights', + 'description': 'BBC reports and video highlights at the Farnborough Airshow.', + }, + 'playlist_count': 9, + 'skip': 'Save time', + }, { + # article with multiple videos embedded with `new SMP()` + # broken + 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460', + 'info_dict': { + 'id': '3662a707-0af9-3149-963f-47bea720b460', + 'title': 'BUGGER', + }, + 'playlist_count': 18, + }, { + # single video embedded with data-playable containing vpid + 'url': 'http://www.bbc.com/news/world-europe-32041533', + 'info_dict': { + 'id': 'p02mprgb', + 'ext': 'mp4', + 'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV', + 'description': 'md5:2868290467291b37feda7863f7a83f54', + 'duration': 47, + 'timestamp': 1427219242, + 'upload_date': '20150324', + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { + # article with single video embedded with data-playable containing XML playlist + # with direct video links as progressiveDownloadUrl (for now these are extracted) + # and playlist with f4m and m3u8 as streamingUrl + 'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu', + 'info_dict': { + 'id': '150615_telabyad_kentin_cogu', + 'ext': 'mp4', + 'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde", + 'description': 'md5:33a4805a855c9baf7115fcbde57e7025', + 'timestamp': 1434397334, + 'upload_date': '20150615', + }, + 'params': { + 'skip_download': True, + } + }, { + # single video embedded with data-playable containing XML playlists (regional section) + 'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw', + 'info_dict': { + 'id': '150619_video_honduras_militares_hospitales_corrupcion_aw', + 'ext': 'mp4', + 'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción', + 'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8', + 'timestamp': 1434713142, + 'upload_date': '20150619', + }, + 'params': { + 'skip_download': True, + } + }, { + # single video from video playlist embedded with vxp-playlist-data JSON + 'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376', + 'info_dict': { + 'id': 'p02w6qjc', + 'ext': 'mp4', + 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''', + 'duration': 56, + 'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''', + }, + 'params': { + 'skip_download': True, + } + }, { + # single video story with digitalData + 'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret', + 'info_dict': { + 'id': 'p02q6gc4', + 'ext': 'flv', + 'title': 'Sri Lanka’s spicy secret', + 'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.', + 'timestamp': 1437674293, + 'upload_date': '20150723', + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { + # single video story without digitalData + 'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star', + 'info_dict': { + 'id': 'p018zqqg', + 'ext': 'mp4', + 'title': 'Hyundai Santa Fe Sport: Rock star', + 'description': 'md5:b042a26142c4154a6e472933cf20793d', + 'timestamp': 1415867444, + 'upload_date': '20141113', + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { + # single video embedded with Morph + 'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975', + 'info_dict': { + 'id': 'p041vhd0', + 'ext': 'mp4', + 'title': "Nigeria v Japan - Men's First Round", + 'description': 'Live coverage of the first round from Group B at the Amazonia Arena.', + 'duration': 7980, + 'uploader': 'BBC Sport', + 'uploader_id': 'bbc_sport', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'skip': 'Georestricted to UK', + }, { + # single video with playlist.sxml URL in playlist param + 'url': 'http://www.bbc.com/sport/0/football/33653409', + 'info_dict': { + 'id': 'p02xycnp', + 'ext': 'mp4', + 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?', + 'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.', + 'duration': 140, + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { + # article with multiple videos embedded with playlist.sxml in playlist param + 'url': 'http://www.bbc.com/sport/0/football/34475836', + 'info_dict': { + 'id': '34475836', + 'title': 'Jurgen Klopp: Furious football from a witty and winning coach', + 'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.', + }, + 'playlist_count': 3, + }, { + # school report article with single video + 'url': 'http://www.bbc.co.uk/schoolreport/35744779', + 'info_dict': { + 'id': '35744779', + 'title': 'School which breaks down barriers in Jerusalem', + }, + 'playlist_count': 1, + }, { + # single video with playlist URL from weather section + 'url': 'http://www.bbc.com/weather/features/33601775', + 'only_matching': True, + }, { + # custom redirection to www.bbc.com + 'url': 'http://www.bbc.co.uk/news/science-environment-33661876', + 'only_matching': True, + }, { + # single video article embedded with data-media-vpid + 'url': 'http://www.bbc.co.uk/sport/rowing/35908187', + 'only_matching': True, + }, { + 'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1', + 'info_dict': { + 'id': 'p06556y7', + 'ext': 'mp4', + 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?', + 'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd', + }, + 'params': { + 'skip_download': True, + } + }, { + # window.__PRELOADED_STATE__ + 'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl', + 'info_dict': { + 'id': 'b0b9z4vz', + 'ext': 'mp4', + 'title': 'Prom 6: An American in Paris and Turangalila', + 'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8', + 'uploader': 'Radio 3', + 'uploader_id': 'bbc_radio_three', + }, + }, { + 'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227', + 'info_dict': { + 'id': 'p06w9tws', + 'ext': 'mp4', + 'title': 'md5:2fabf12a726603193a2879a055f72514', + 'description': 'Learn English words and phrases from this story', + }, + 'add_ie': [BBCCoUkIE.ie_key()], + }] + + @classmethod + def suitable(cls, url): + EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE) + return (False if any(ie.suitable(url) for ie in EXCLUDE_IE) + else super(BBCIE, cls).suitable(url)) + + def _extract_from_media_meta(self, media_meta, video_id): + # Direct links to media in media metadata (e.g. + # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu) + # TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml + source_files = media_meta.get('sourceFiles') + if source_files: + return [{ + 'url': f['url'], + 'format_id': format_id, + 'ext': f.get('encoding'), + 'tbr': float_or_none(f.get('bitrate'), 1000), + 'filesize': int_or_none(f.get('filesize')), + } for format_id, f in source_files.items() if f.get('url')], [] + + programme_id = media_meta.get('externalId') + if programme_id: + return self._download_media_selector(programme_id) + + # Process playlist.sxml as legacy playlist + href = media_meta.get('href') + if href: + playlist = self._download_legacy_playlist_url(href) + _, _, _, _, formats, subtitles = self._extract_from_legacy_playlist(playlist, video_id) + return formats, subtitles + + return [], [] + + def _extract_from_playlist_sxml(self, url, playlist_id, timestamp): + programme_id, title, description, duration, formats, subtitles = \ + self._process_legacy_playlist_url(url, playlist_id) + self._sort_formats(formats) + return { + 'id': programme_id, + 'title': title, + 'description': description, + 'duration': duration, + 'timestamp': timestamp, + 'formats': formats, + 'subtitles': subtitles, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + json_ld_info = self._search_json_ld(webpage, playlist_id, default={}) + timestamp = json_ld_info.get('timestamp') + + playlist_title = json_ld_info.get('title') + if not playlist_title: + playlist_title = self._og_search_title( + webpage, default=None) or self._html_search_regex( + r'<title>(.+?)', webpage, 'playlist title', default=None) + if playlist_title: + playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip() + + playlist_description = json_ld_info.get( + 'description') or self._og_search_description(webpage, default=None) + + if not timestamp: + timestamp = parse_iso8601(self._search_regex( + [r']+property="article:published_time"[^>]+content="([^"]+)"', + r'itemprop="datePublished"[^>]+datetime="([^"]+)"', + r'"datePublished":\s*"([^"]+)'], + webpage, 'date', default=None)) + + entries = [] + + # article with multiple videos embedded with playlist.sxml (e.g. + # http://www.bbc.com/sport/0/football/34475836) + playlists = re.findall(r']+name="playlist"[^>]+value="([^"]+)"', webpage) + playlists.extend(re.findall(r'data-media-id="([^"]+/playlist\.sxml)"', webpage)) + if playlists: + entries = [ + self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp) + for playlist_url in playlists] + + # news article with multiple videos embedded with data-playable + data_playables = re.findall(r'data-playable=(["\'])({.+?})\1', webpage) + if data_playables: + for _, data_playable_json in data_playables: + data_playable = self._parse_json( + unescapeHTML(data_playable_json), playlist_id, fatal=False) + if not data_playable: + continue + settings = data_playable.get('settings', {}) + if settings: + # data-playable with video vpid in settings.playlistObject.items (e.g. + # http://www.bbc.com/news/world-us-canada-34473351) + playlist_object = settings.get('playlistObject', {}) + if playlist_object: + items = playlist_object.get('items') + if items and isinstance(items, list): + title = playlist_object['title'] + description = playlist_object.get('summary') + duration = int_or_none(items[0].get('duration')) + programme_id = items[0].get('vpid') + formats, subtitles = self._download_media_selector(programme_id) + self._sort_formats(formats) + entries.append({ + 'id': programme_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'duration': duration, + 'formats': formats, + 'subtitles': subtitles, + }) + else: + # data-playable without vpid but with a playlist.sxml URLs + # in otherSettings.playlist (e.g. + # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani) + playlist = data_playable.get('otherSettings', {}).get('playlist', {}) + if playlist: + entry = None + for key in ('streaming', 'progressiveDownload'): + playlist_url = playlist.get('%sUrl' % key) + if not playlist_url: + continue + try: + info = self._extract_from_playlist_sxml( + playlist_url, playlist_id, timestamp) + if not entry: + entry = info + else: + entry['title'] = info['title'] + entry['formats'].extend(info['formats']) + except Exception as e: + # Some playlist URL may fail with 500, at the same time + # the other one may work fine (e.g. + # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu) + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500: + continue + raise + if entry: + self._sort_formats(entry['formats']) + entries.append(entry) + + if entries: + return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) + + # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227 + group_id = self._search_regex( + r']+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX, + webpage, 'group id', default=None) + if playlist_id: + return self.url_result( + 'https://www.bbc.co.uk/programmes/%s' % group_id, + ie=BBCCoUkIE.ie_key()) + + # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret) + programme_id = self._search_regex( + [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX, + r']+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX, + r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX], + webpage, 'vpid', default=None) + + if programme_id: + formats, subtitles = self._download_media_selector(programme_id) + self._sort_formats(formats) + # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star) + digital_data = self._parse_json( + self._search_regex( + r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'), + programme_id, fatal=False) + page_info = digital_data.get('page', {}).get('pageInfo', {}) + title = page_info.get('pageName') or self._og_search_title(webpage) + description = page_info.get('description') or self._og_search_description(webpage) + timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp + return { + 'id': programme_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'formats': formats, + 'subtitles': subtitles, + } + + # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975) + # There are several setPayload calls may be present but the video + # seems to be always related to the first one + morph_payload = self._parse_json( + self._search_regex( + r'Morph\.setPayload\([^,]+,\s*({.+?})\);', + webpage, 'morph payload', default='{}'), + playlist_id, fatal=False) + if morph_payload: + components = try_get(morph_payload, lambda x: x['body']['components'], list) or [] + for component in components: + if not isinstance(component, dict): + continue + lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict) + if not lead_media: + continue + identifiers = lead_media.get('identifiers') + if not identifiers or not isinstance(identifiers, dict): + continue + programme_id = identifiers.get('vpid') or identifiers.get('playablePid') + if not programme_id: + continue + title = lead_media.get('title') or self._og_search_title(webpage) + formats, subtitles = self._download_media_selector(programme_id) + self._sort_formats(formats) + description = lead_media.get('summary') + uploader = lead_media.get('masterBrand') + uploader_id = lead_media.get('mid') + duration = None + duration_d = lead_media.get('duration') + if isinstance(duration_d, dict): + duration = parse_duration(dict_get( + duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration'))) + return { + 'id': programme_id, + 'title': title, + 'description': description, + 'duration': duration, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'formats': formats, + 'subtitles': subtitles, + } + + preload_state = self._parse_json(self._search_regex( + r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage, + 'preload state', default='{}'), playlist_id, fatal=False) + if preload_state: + current_programme = preload_state.get('programmes', {}).get('current') or {} + programme_id = current_programme.get('id') + if current_programme and programme_id and current_programme.get('type') == 'playable_item': + title = current_programme.get('titles', {}).get('tertiary') or playlist_title + formats, subtitles = self._download_media_selector(programme_id) + self._sort_formats(formats) + synopses = current_programme.get('synopses') or {} + network = current_programme.get('network') or {} + duration = int_or_none( + current_programme.get('duration', {}).get('value')) + thumbnail = None + image_url = current_programme.get('image_url') + if image_url: + thumbnail = image_url.replace('{recipe}', '1920x1920') + return { + 'id': programme_id, + 'title': title, + 'description': dict_get(synopses, ('long', 'medium', 'short')), + 'thumbnail': thumbnail, + 'duration': duration, + 'uploader': network.get('short_title'), + 'uploader_id': network.get('id'), + 'formats': formats, + 'subtitles': subtitles, + } + + bbc3_config = self._parse_json( + self._search_regex( + r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage, + 'bbcthree config', default='{}'), + playlist_id, transform_source=js_to_json, fatal=False) + if bbc3_config: + bbc3_playlist = try_get( + bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'], + dict) + if bbc3_playlist: + playlist_title = bbc3_playlist.get('title') or playlist_title + thumbnail = bbc3_playlist.get('holdingImageURL') + entries = [] + for bbc3_item in bbc3_playlist['items']: + programme_id = bbc3_item.get('versionID') + if not programme_id: + continue + formats, subtitles = self._download_media_selector(programme_id) + self._sort_formats(formats) + entries.append({ + 'id': programme_id, + 'title': playlist_title, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'formats': formats, + 'subtitles': subtitles, + }) + return self.playlist_result( + entries, playlist_id, playlist_title, playlist_description) + + def extract_all(pattern): + return list(filter(None, map( + lambda s: self._parse_json(s, playlist_id, fatal=False), + re.findall(pattern, webpage)))) + + # Multiple video article (e.g. + # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460) + EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX + entries = [] + for match in extract_all(r'new\s+SMP\(({.+?})\)'): + embed_url = match.get('playerSettings', {}).get('externalEmbedUrl') + if embed_url and re.match(EMBED_URL, embed_url): + entries.append(embed_url) + entries.extend(re.findall( + r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage)) + if entries: + return self.playlist_result( + [self.url_result(entry_, 'BBCCoUk') for entry_ in entries], + playlist_id, playlist_title, playlist_description) + + # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511) + medias = extract_all(r"data-media-meta='({[^']+})'") + + if not medias: + # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international) + media_asset = self._search_regex( + r'mediaAssetPage\.init\(\s*({.+?}), "/', + webpage, 'media asset', default=None) + if media_asset: + media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False) + medias = [] + for video in media_asset_page.get('videos', {}).values(): + medias.extend(video.values()) + + if not medias: + # Multiple video playlist with single `now playing` entry (e.g. + # http://www.bbc.com/news/video_and_audio/must_see/33767813) + vxp_playlist = self._parse_json( + self._search_regex( + r']+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)', + webpage, 'playlist data'), + playlist_id) + playlist_medias = [] + for item in vxp_playlist: + media = item.get('media') + if not media: + continue + playlist_medias.append(media) + # Download single video if found media with asset id matching the video id from URL + if item.get('advert', {}).get('assetId') == playlist_id: + medias = [media] + break + # Fallback to the whole playlist + if not medias: + medias = playlist_medias + + entries = [] + for num, media_meta in enumerate(medias, start=1): + formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id) + if not formats: + continue + self._sort_formats(formats) + + video_id = media_meta.get('externalId') + if not video_id: + video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num) + + title = media_meta.get('caption') + if not title: + title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num) + + duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration')) + + images = [] + for image in media_meta.get('images', {}).values(): + images.extend(image.values()) + if 'image' in media_meta: + images.append(media_meta['image']) + + thumbnails = [{ + 'url': image.get('href'), + 'width': int_or_none(image.get('width')), + 'height': int_or_none(image.get('height')), + } for image in images] + + entries.append({ + 'id': video_id, + 'title': title, + 'thumbnails': thumbnails, + 'duration': duration, + 'timestamp': timestamp, + 'formats': formats, + 'subtitles': subtitles, + }) + + return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) + + +class BBCCoUkArticleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P[a-zA-Z0-9]+)' + IE_NAME = 'bbc.co.uk:article' + IE_DESC = 'BBC articles' + + _TEST = { + 'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer', + 'info_dict': { + 'id': '3jNQLTMrPlYGTBn0WV6M2MS', + 'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four', + 'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.', + }, + 'playlist_count': 4, + 'add_ie': ['BBCCoUk'], + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + title = self._og_search_title(webpage) + description = self._og_search_description(webpage).strip() + + entries = [self.url_result(programme_url) for programme_url in re.findall( + r']+typeof="Clip"[^>]+resource="([^"]+)"', webpage)] + + return self.playlist_result(entries, playlist_id, title, description) + + +class BBCCoUkPlaylistBaseIE(InfoExtractor): + def _entries(self, webpage, url, playlist_id): + single_page = 'page' in compat_urlparse.parse_qs( + compat_urlparse.urlparse(url).query) + for page_num in itertools.count(2): + for video_id in re.findall( + self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage): + yield self.url_result( + self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key()) + if single_page: + return + next_page = self._search_regex( + r']+class=(["\'])pagination_+next\1[^>]*>]+href=(["\'])(?P(?:(?!\2).)+)\2', + webpage, 'next page url', default=None, group='url') + if not next_page: + break + webpage = self._download_webpage( + compat_urlparse.urljoin(url, next_page), playlist_id, + 'Downloading page %d' % page_num, page_num) + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + title, description = self._extract_title_and_description(webpage) + + return self.playlist_result( + self._entries(webpage, url, playlist_id), + playlist_id, title, description) + + +class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): + IE_NAME = 'bbc.co.uk:iplayer:playlist' + _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P%s)' % BBCCoUkIE._ID_REGEX + _URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s' + _VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)' + _TESTS = [{ + 'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v', + 'info_dict': { + 'id': 'b05rcz9v', + 'title': 'The Disappearance', + 'description': 'French thriller serial about a missing teenager.', + }, + 'playlist_mincount': 6, + 'skip': 'This programme is not currently available on BBC iPlayer', + }, { + # Available for over a year unlike 30 days for most other programmes + 'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32', + 'info_dict': { + 'id': 'p02tcc32', + 'title': 'Bohemian Icons', + 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7', + }, + 'playlist_mincount': 10, + }] + + def _extract_title_and_description(self, webpage): + title = self._search_regex(r'

    ([^<]+)

    ', webpage, 'title', fatal=False) + description = self._search_regex( + r']+class=(["\'])subtitle\1[^>]*>(?P[^<]+)

    ', + webpage, 'description', fatal=False, group='value') + return title, description + + +class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE): + IE_NAME = 'bbc.co.uk:playlist' + _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX + _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s' + _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)' + _TESTS = [{ + 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips', + 'info_dict': { + 'id': 'b05rcz9v', + 'title': 'The Disappearance - Clips - BBC Four', + 'description': 'French thriller serial about a missing teenager.', + }, + 'playlist_mincount': 7, + }, { + # multipage playlist, explicit page + 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1', + 'info_dict': { + 'id': 'b00mfl7n', + 'title': 'Frozen Planet - Clips - BBC One', + 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c', + }, + 'playlist_mincount': 24, + }, { + # multipage playlist, all pages + 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips', + 'info_dict': { + 'id': 'b00mfl7n', + 'title': 'Frozen Planet - Clips - BBC One', + 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c', + }, + 'playlist_mincount': 142, + }, { + 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06', + 'only_matching': True, + }, { + 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips', + 'only_matching': True, + }, { + 'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player', + 'only_matching': True, + }] + + def _extract_title_and_description(self, webpage): + title = self._og_search_title(webpage, fatal=False) + description = self._og_search_description(webpage) + return title, description diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py new file mode 100644 index 000000000..e264a145f --- /dev/null +++ b/youtube_dl/extractor/beampro.py @@ -0,0 +1,191 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + clean_html, + compat_str, + float_or_none, + int_or_none, + parse_iso8601, + try_get, + urljoin, +) + + +class BeamProBaseIE(InfoExtractor): + _API_BASE = 'https://mixer.com/api/v1' + _RATINGS = {'family': 0, 'teen': 13, '18+': 18} + + def _extract_channel_info(self, chan): + user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id']) + return { + 'uploader': chan.get('token') or try_get( + chan, lambda x: x['user']['username'], compat_str), + 'uploader_id': compat_str(user_id) if user_id else None, + 'age_limit': self._RATINGS.get(chan.get('audience')), + } + + +class BeamProLiveIE(BeamProBaseIE): + IE_NAME = 'Mixer:live' + _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P[^/?#&]+)' + _TEST = { + 'url': 'http://mixer.com/niterhayven', + 'info_dict': { + 'id': '261562', + 'ext': 'mp4', + 'title': 'Introducing The Witcher 3 // The Grind Starts Now!', + 'description': 'md5:0b161ac080f15fe05d18a07adb44a74d', + 'thumbnail': r're:https://.*\.jpg$', + 'timestamp': 1483477281, + 'upload_date': '20170103', + 'uploader': 'niterhayven', + 'uploader_id': '373396', + 'age_limit': 18, + 'is_live': True, + 'view_count': int, + }, + 'skip': 'niterhayven is offline', + 'params': { + 'skip_download': True, + }, + } + + _MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE + + @classmethod + def suitable(cls, url): + return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url) + + def _real_extract(self, url): + channel_name = self._match_id(url) + + chan = self._download_json( + '%s/channels/%s' % (self._API_BASE, channel_name), channel_name) + + if chan.get('online') is False: + raise ExtractorError( + '{0} is offline'.format(channel_name), expected=True) + + channel_id = chan['id'] + + def manifest_url(kind): + return self._MANIFEST_URL_TEMPLATE % (channel_id, kind) + + formats = self._extract_m3u8_formats( + manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls', + fatal=False) + formats.extend(self._extract_smil_formats( + manifest_url('smil'), channel_name, fatal=False)) + self._sort_formats(formats) + + info = { + 'id': compat_str(chan.get('id') or channel_name), + 'title': self._live_title(chan.get('name') or channel_name), + 'description': clean_html(chan.get('description')), + 'thumbnail': try_get( + chan, lambda x: x['thumbnail']['url'], compat_str), + 'timestamp': parse_iso8601(chan.get('updatedAt')), + 'is_live': True, + 'view_count': int_or_none(chan.get('viewersTotal')), + 'formats': formats, + } + info.update(self._extract_channel_info(chan)) + + return info + + +class BeamProVodIE(BeamProBaseIE): + IE_NAME = 'Mixer:vod' + _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P\w+)' + _TESTS = [{ + 'url': 'https://mixer.com/willow8714?vod=2259830', + 'md5': 'b2431e6e8347dc92ebafb565d368b76b', + 'info_dict': { + 'id': '2259830', + 'ext': 'mp4', + 'title': 'willow8714\'s Channel', + 'duration': 6828.15, + 'thumbnail': r're:https://.*source\.png$', + 'timestamp': 1494046474, + 'upload_date': '20170506', + 'uploader': 'willow8714', + 'uploader_id': '6085379', + 'age_limit': 13, + 'view_count': int, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw', + 'only_matching': True, + }] + + @staticmethod + def _extract_format(vod, vod_type): + if not vod.get('baseUrl'): + return [] + + if vod_type == 'hls': + filename, protocol = 'manifest.m3u8', 'm3u8_native' + elif vod_type == 'raw': + filename, protocol = 'source.mp4', 'https' + else: + assert False + + data = vod.get('data') if isinstance(vod.get('data'), dict) else {} + + format_id = [vod_type] + if isinstance(data.get('Height'), compat_str): + format_id.append('%sp' % data['Height']) + + return [{ + 'url': urljoin(vod['baseUrl'], filename), + 'format_id': '-'.join(format_id), + 'ext': 'mp4', + 'protocol': protocol, + 'width': int_or_none(data.get('Width')), + 'height': int_or_none(data.get('Height')), + 'fps': int_or_none(data.get('Fps')), + 'tbr': int_or_none(data.get('Bitrate'), 1000), + }] + + def _real_extract(self, url): + vod_id = self._match_id(url) + + vod_info = self._download_json( + '%s/recordings/%s' % (self._API_BASE, vod_id), vod_id) + + state = vod_info.get('state') + if state != 'AVAILABLE': + raise ExtractorError( + 'VOD %s is not available (state: %s)' % (vod_id, state), + expected=True) + + formats = [] + thumbnail_url = None + + for vod in vod_info['vods']: + vod_type = vod.get('format') + if vod_type in ('hls', 'raw'): + formats.extend(self._extract_format(vod, vod_type)) + elif vod_type == 'thumbnail': + thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png') + + self._sort_formats(formats) + + info = { + 'id': vod_id, + 'title': vod_info.get('name') or vod_id, + 'duration': float_or_none(vod_info.get('duration')), + 'thumbnail': thumbnail_url, + 'timestamp': parse_iso8601(vod_info.get('createdAt')), + 'view_count': int_or_none(vod_info.get('viewsTotal')), + 'formats': formats, + } + info.update(self._extract_channel_info(vod_info.get('channel') or {})) + + return info diff --git a/youtube_dl/extractor/beatport.py b/youtube_dl/extractor/beatport.py new file mode 100644 index 000000000..e60709417 --- /dev/null +++ b/youtube_dl/extractor/beatport.py @@ -0,0 +1,103 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import int_or_none + + +class BeatportIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.|pro\.)?beatport\.com/track/(?P[^/]+)/(?P[0-9]+)' + _TESTS = [{ + 'url': 'https://beatport.com/track/synesthesia-original-mix/5379371', + 'md5': 'b3c34d8639a2f6a7f734382358478887', + 'info_dict': { + 'id': '5379371', + 'display_id': 'synesthesia-original-mix', + 'ext': 'mp4', + 'title': 'Froxic - Synesthesia (Original Mix)', + }, + }, { + 'url': 'https://beatport.com/track/love-and-war-original-mix/3756896', + 'md5': 'e44c3025dfa38c6577fbaeb43da43514', + 'info_dict': { + 'id': '3756896', + 'display_id': 'love-and-war-original-mix', + 'ext': 'mp3', + 'title': 'Wolfgang Gartner - Love & War (Original Mix)', + }, + }, { + 'url': 'https://beatport.com/track/birds-original-mix/4991738', + 'md5': 'a1fd8e8046de3950fd039304c186c05f', + 'info_dict': { + 'id': '4991738', + 'display_id': 'birds-original-mix', + 'ext': 'mp4', + 'title': "Tos, Middle Milk, Mumblin' Johnsson - Birds (Original Mix)", + } + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + track_id = mobj.group('id') + display_id = mobj.group('display_id') + + webpage = self._download_webpage(url, display_id) + + playables = self._parse_json( + self._search_regex( + r'window\.Playables\s*=\s*({.+?});', webpage, + 'playables info', flags=re.DOTALL), + track_id) + + track = next(t for t in playables['tracks'] if t['id'] == int(track_id)) + + title = ', '.join((a['name'] for a in track['artists'])) + ' - ' + track['name'] + if track['mix']: + title += ' (' + track['mix'] + ')' + + formats = [] + for ext, info in track['preview'].items(): + if not info['url']: + continue + fmt = { + 'url': info['url'], + 'ext': ext, + 'format_id': ext, + 'vcodec': 'none', + } + if ext == 'mp3': + fmt['preference'] = 0 + fmt['acodec'] = 'mp3' + fmt['abr'] = 96 + fmt['asr'] = 44100 + elif ext == 'mp4': + fmt['preference'] = 1 + fmt['acodec'] = 'aac' + fmt['abr'] = 96 + fmt['asr'] = 44100 + formats.append(fmt) + self._sort_formats(formats) + + images = [] + for name, info in track['images'].items(): + image_url = info.get('url') + if name == 'dynamic' or not image_url: + continue + image = { + 'id': name, + 'url': image_url, + 'height': int_or_none(info.get('height')), + 'width': int_or_none(info.get('width')), + } + images.append(image) + + return { + 'id': compat_str(track.get('id')) or track_id, + 'display_id': track.get('slug') or display_id, + 'title': title, + 'formats': formats, + 'thumbnails': images, + } diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py new file mode 100644 index 000000000..c15a0ac8f --- /dev/null +++ b/youtube_dl/extractor/beeg.py @@ -0,0 +1,109 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urlparse, +) +from ..utils import ( + int_or_none, + unified_timestamp, +) + + +class BeegIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P\d+)' + _TESTS = [{ + # api/v6 v1 + 'url': 'http://beeg.com/5416503', + 'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820', + 'info_dict': { + 'id': '5416503', + 'ext': 'mp4', + 'title': 'Sultry Striptease', + 'description': 'md5:d22219c09da287c14bed3d6c37ce4bc2', + 'timestamp': 1391813355, + 'upload_date': '20140207', + 'duration': 383, + 'tags': list, + 'age_limit': 18, + } + }, { + # api/v6 v2 + 'url': 'https://beeg.com/1941093077?t=911-1391', + 'only_matching': True, + }, { + 'url': 'https://beeg.porn/video/5416503', + 'only_matching': True, + }, { + 'url': 'https://beeg.porn/5416503', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + beeg_version = self._search_regex( + r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version', + default='1546225636701') + + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + t = qs.get('t', [''])[0].split('-') + if len(t) > 1: + query = { + 'v': 2, + 's': t[0], + 'e': t[1], + } + else: + query = {'v': 1} + + for api_path in ('', 'api.'): + video = self._download_json( + 'https://%sbeeg.com/api/v6/%s/video/%s' + % (api_path, beeg_version, video_id), video_id, + fatal=api_path == 'api.', query=query) + if video: + break + + formats = [] + for format_id, video_url in video.items(): + if not video_url: + continue + height = self._search_regex( + r'^(\d+)[pP]$', format_id, 'height', default=None) + if not height: + continue + formats.append({ + 'url': self._proto_relative_url( + video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'), + 'format_id': format_id, + 'height': int(height), + }) + self._sort_formats(formats) + + title = video['title'] + video_id = compat_str(video.get('id') or video_id) + display_id = video.get('code') + description = video.get('desc') + series = video.get('ps_name') + + timestamp = unified_timestamp(video.get('date')) + duration = int_or_none(video.get('duration')) + + tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'series': series, + 'timestamp': timestamp, + 'duration': duration, + 'tags': tags, + 'formats': formats, + 'age_limit': self._rta_search(webpage), + } diff --git a/youtube_dl/extractor/behindkink.py b/youtube_dl/extractor/behindkink.py new file mode 100644 index 000000000..9bca853b3 --- /dev/null +++ b/youtube_dl/extractor/behindkink.py @@ -0,0 +1,46 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import url_basename + + +class BehindKinkIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?behindkink\.com/(?P[0-9]{4})/(?P[0-9]{2})/(?P[0-9]{2})/(?P[^/#?_]+)' + _TEST = { + 'url': 'http://www.behindkink.com/2014/12/05/what-are-you-passionate-about-marley-blaze/', + 'md5': '507b57d8fdcd75a41a9a7bdb7989c762', + 'info_dict': { + 'id': '37127', + 'ext': 'mp4', + 'title': 'What are you passionate about – Marley Blaze', + 'description': 'md5:aee8e9611b4ff70186f752975d9b94b4', + 'upload_date': '20141205', + 'thumbnail': 'http://www.behindkink.com/wp-content/uploads/2014/12/blaze-1.jpg', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') + + webpage = self._download_webpage(url, display_id) + + video_url = self._search_regex( + r' + (?: + ctv| + tsn| + bnn(?:bloomberg)?| + thecomedynetwork| + discovery| + discoveryvelocity| + sciencechannel| + investigationdiscovery| + animalplanet| + bravo| + mtv| + space| + etalk + )\.ca| + much\.com + )/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P[0-9]{6,})''' + _TESTS = [{ + 'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070', + 'md5': '36d3ef559cfe8af8efe15922cd3ce950', + 'info_dict': { + 'id': '1403070', + 'ext': 'flv', + 'title': 'David Cockfield\'s Top Picks', + 'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3', + 'upload_date': '20180525', + 'timestamp': 1527288600, + }, + }, { + 'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582', + 'only_matching': True, + }, { + 'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549', + 'only_matching': True, + }, { + 'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654', + 'only_matching': True, + }, { + 'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009', + 'only_matching': True, + }, { + 'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016', + 'only_matching': True, + }, { + 'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6', + 'only_matching': True, + }, { + 'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430', + 'only_matching': True, + }, { + 'url': 'http://www.etalk.ca/video?videoid=663455', + 'only_matching': True, + }] + _DOMAINS = { + 'thecomedynetwork': 'comedy', + 'discoveryvelocity': 'discvel', + 'sciencechannel': 'discsci', + 'investigationdiscovery': 'invdisc', + 'animalplanet': 'aniplan', + 'etalk': 'ctv', + 'bnnbloomberg': 'bnn', + } + + def _real_extract(self, url): + domain, video_id = re.match(self._VALID_URL, url).groups() + domain = domain.split('.')[0] + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id), + 'ie_key': 'NineCNineMedia', + } diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py new file mode 100644 index 000000000..d7ceaa85e --- /dev/null +++ b/youtube_dl/extractor/bet.py @@ -0,0 +1,80 @@ +from __future__ import unicode_literals + +from .mtv import MTVServicesInfoExtractor +from ..utils import unified_strdate + + +class BetIE(MTVServicesInfoExtractor): + _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P.+?)\.html' + _TESTS = [ + { + 'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html', + 'info_dict': { + 'id': '07e96bd3-8850-3051-b856-271b457f0ab8', + 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism', + 'ext': 'flv', + 'title': 'A Conversation With President Obama', + 'description': 'President Obama urges persistence in confronting racism and bias.', + 'duration': 1534, + 'upload_date': '20141208', + 'thumbnail': r're:(?i)^https?://.*\.jpg$', + 'subtitles': { + 'en': 'mincount:2', + } + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + { + 'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html', + 'info_dict': { + 'id': '9f516bf1-7543-39c4-8076-dd441b459ba9', + 'display_id': 'justice-for-ferguson-a-community-reacts', + 'ext': 'flv', + 'title': 'Justice for Ferguson: A Community Reacts', + 'description': 'A BET News special.', + 'duration': 1696, + 'upload_date': '20141125', + 'thumbnail': r're:(?i)^https?://.*\.jpg$', + 'subtitles': { + 'en': 'mincount:2', + } + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + } + ] + + _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player" + + def _get_feed_query(self, uri): + return { + 'uuid': uri, + } + + def _extract_mgid(self, webpage): + return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid') + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + mgid = self._extract_mgid(webpage) + videos_info = self._get_videos_info(mgid) + + info_dict = videos_info['entries'][0] + + upload_date = unified_strdate(self._html_search_meta('date', webpage)) + description = self._html_search_meta('description', webpage) + + info_dict.update({ + 'display_id': display_id, + 'description': description, + 'upload_date': upload_date, + }) + + return info_dict diff --git a/youtube_dl/extractor/bfi.py b/youtube_dl/extractor/bfi.py new file mode 100644 index 000000000..60c8944b5 --- /dev/null +++ b/youtube_dl/extractor/bfi.py @@ -0,0 +1,37 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import extract_attributes + + +class BFIPlayerIE(InfoExtractor): + IE_NAME = 'bfi:player' + _VALID_URL = r'https?://player\.bfi\.org\.uk/[^/]+/film/watch-(?P[\w-]+)-online' + _TEST = { + 'url': 'https://player.bfi.org.uk/free/film/watch-computer-doctor-1974-online', + 'md5': 'e8783ebd8e061ec4bc6e9501ed547de8', + 'info_dict': { + 'id': 'htNnhlZjE60C9VySkQEIBtU-cNV1Xx63', + 'ext': 'mp4', + 'title': 'Computer Doctor', + 'description': 'md5:fb6c240d40c4dbe40428bdd62f78203b', + }, + 'skip': 'BFI Player films cannot be played outside of the UK', + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + entries = [] + for player_el in re.findall(r'(?s)<[^>]+class="player"[^>]*>', webpage): + player_attr = extract_attributes(player_el) + ooyala_id = player_attr.get('data-video-id') + if not ooyala_id: + continue + entries.append(self.url_result( + 'ooyala:' + ooyala_id, 'Ooyala', + ooyala_id, player_attr.get('data-label'))) + return self.playlist_result(entries) diff --git a/youtube_dl/extractor/bigflix.py b/youtube_dl/extractor/bigflix.py new file mode 100644 index 000000000..28e3e59f6 --- /dev/null +++ b/youtube_dl/extractor/bigflix.py @@ -0,0 +1,78 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_urllib_parse_unquote, +) + + +class BigflixIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.+/(?P[0-9]+)' + _TESTS = [{ + # 2 formats + 'url': 'http://www.bigflix.com/Tamil-movies/Drama-movies/Madarasapatinam/16070', + 'info_dict': { + 'id': '16070', + 'ext': 'mp4', + 'title': 'Madarasapatinam', + 'description': 'md5:9f0470b26a4ba8e824c823b5d95c2f6b', + 'formats': 'mincount:2', + }, + 'params': { + 'skip_download': True, + } + }, { + # multiple formats + 'url': 'http://www.bigflix.com/Malayalam-movies/Drama-movies/Indian-Rupee/15967', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex( + r']+class=["\']pagetitle["\'][^>]*>(.+?)', + webpage, 'title') + + def decode_url(quoted_b64_url): + return compat_b64decode(compat_urllib_parse_unquote( + quoted_b64_url)).decode('utf-8') + + formats = [] + for height, encoded_url in re.findall( + r'ContentURL_(\d{3,4})[pP][^=]+=([^&]+)', webpage): + video_url = decode_url(encoded_url) + f = { + 'url': video_url, + 'format_id': '%sp' % height, + 'height': int(height), + } + if video_url.startswith('rtmp'): + f['ext'] = 'flv' + formats.append(f) + + file_url = self._search_regex( + r'file=([^&]+)', webpage, 'video url', default=None) + if file_url: + video_url = decode_url(file_url) + if all(f['url'] != video_url for f in formats): + formats.append({ + 'url': decode_url(file_url), + }) + + self._sort_formats(formats) + + description = self._html_search_meta('description', webpage) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': formats + } diff --git a/youtube_dl/extractor/bild.py b/youtube_dl/extractor/bild.py new file mode 100644 index 000000000..b8dfbd42b --- /dev/null +++ b/youtube_dl/extractor/bild.py @@ -0,0 +1,40 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + unescapeHTML, +) + + +class BildIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?bild\.de/(?:[^/]+/)+(?P[^/]+)-(?P\d+)(?:,auto=true)?\.bild\.html' + IE_DESC = 'Bild.de' + _TEST = { + 'url': 'http://www.bild.de/video/clip/apple-ipad-air/das-koennen-die-neuen-ipads-38184146.bild.html', + 'md5': 'dd495cbd99f2413502a1713a1156ac8a', + 'info_dict': { + 'id': '38184146', + 'ext': 'mp4', + 'title': 'Das können die neuen iPads', + 'description': 'md5:a4058c4fa2a804ab59c00d7244bbf62f', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 196, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + video_data = self._download_json( + url.split('.bild.html')[0] + ',view=json.bild.html', video_id) + + return { + 'id': video_id, + 'title': unescapeHTML(video_data['title']).strip(), + 'description': unescapeHTML(video_data.get('description')), + 'url': video_data['clipList'][0]['srces'][0]['src'], + 'thumbnail': video_data.get('poster'), + 'duration': int_or_none(video_data.get('durationSec')), + } diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py new file mode 100644 index 000000000..3746671d3 --- /dev/null +++ b/youtube_dl/extractor/bilibili.py @@ -0,0 +1,308 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import hashlib +import re + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) +from ..utils import ( + ExtractorError, + int_or_none, + float_or_none, + parse_iso8601, + smuggle_url, + strip_jsonp, + unified_timestamp, + unsmuggle_url, + urlencode_postdata, +) + + +class BiliBiliIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P\d+)/play#)(?P\d+)' + + _TESTS = [{ + 'url': 'http://www.bilibili.tv/video/av1074402/', + 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788', + 'info_dict': { + 'id': '1074402', + 'ext': 'flv', + 'title': '【金坷垃】金泡沫', + 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', + 'duration': 308.067, + 'timestamp': 1398012678, + 'upload_date': '20140420', + 'thumbnail': r're:^https?://.+\.jpg', + 'uploader': '菊子桑', + 'uploader_id': '156160', + }, + }, { + # Tested in BiliBiliBangumiIE + 'url': 'http://bangumi.bilibili.com/anime/1869/play#40062', + 'only_matching': True, + }, { + 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643', + 'md5': '3f721ad1e75030cc06faf73587cfec57', + 'info_dict': { + 'id': '100643', + 'ext': 'mp4', + 'title': 'CHAOS;CHILD', + 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...', + }, + 'skip': 'Geo-restricted to China', + }, { + # Title with double quotes + 'url': 'http://www.bilibili.com/video/av8903802/', + 'info_dict': { + 'id': '8903802', + 'title': '阿滴英文|英文歌分享#6 "Closer', + 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文', + }, + 'playlist': [{ + 'info_dict': { + 'id': '8903802_part1', + 'ext': 'flv', + 'title': '阿滴英文|英文歌分享#6 "Closer', + 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a', + 'uploader': '阿滴英文', + 'uploader_id': '65880958', + 'timestamp': 1488382634, + 'upload_date': '20170301', + }, + 'params': { + 'skip_download': True, # Test metadata only + }, + }, { + 'info_dict': { + 'id': '8903802_part2', + 'ext': 'flv', + 'title': '阿滴英文|英文歌分享#6 "Closer', + 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a', + 'uploader': '阿滴英文', + 'uploader_id': '65880958', + 'timestamp': 1488382634, + 'upload_date': '20170301', + }, + 'params': { + 'skip_download': True, # Test metadata only + }, + }] + }] + + _APP_KEY = 'iVGUTjsxvpLeuDCf' + _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt' + + def _report_error(self, result): + if 'message' in result: + raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True) + elif 'code' in result: + raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True) + else: + raise ExtractorError('Can\'t extract Bangumi episode ID') + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + anime_id = mobj.group('anime_id') + webpage = self._download_webpage(url, video_id) + + if 'anime/' not in url: + cid = self._search_regex( + r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid', + default=None + ) or compat_parse_qs(self._search_regex( + [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', + r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)', + r']+src="https://secure\.bilibili\.com/secure,([^"]+)"'], + webpage, 'player parameters'))['cid'][0] + else: + if 'no_bangumi_tip' not in smuggled_data: + self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dl with %s' % ( + video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id))) + headers = { + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'Referer': url + } + headers.update(self.geo_verification_headers()) + + js = self._download_json( + 'http://bangumi.bilibili.com/web_api/get_source', video_id, + data=urlencode_postdata({'episode_id': video_id}), + headers=headers) + if 'result' not in js: + self._report_error(js) + cid = js['result']['cid'] + + headers = { + 'Referer': url + } + headers.update(self.geo_verification_headers()) + + entries = [] + + RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4') + for num, rendition in enumerate(RENDITIONS, start=1): + payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition) + sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest() + + video_info = self._download_json( + 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign), + video_id, note='Downloading video info page', + headers=headers, fatal=num == len(RENDITIONS)) + + if not video_info: + continue + + if 'durl' not in video_info: + if num < len(RENDITIONS): + continue + self._report_error(video_info) + + for idx, durl in enumerate(video_info['durl']): + formats = [{ + 'url': durl['url'], + 'filesize': int_or_none(durl['size']), + }] + for backup_url in durl.get('backup_url', []): + formats.append({ + 'url': backup_url, + # backup URLs have lower priorities + 'preference': -2 if 'hd.mp4' in backup_url else -3, + }) + + for a_format in formats: + a_format.setdefault('http_headers', {}).update({ + 'Referer': url, + }) + + self._sort_formats(formats) + + entries.append({ + 'id': '%s_part%s' % (video_id, idx), + 'duration': float_or_none(durl.get('length'), 1000), + 'formats': formats, + }) + break + + title = self._html_search_regex( + (']+\btitle=(["\'])(?P(?:(?!\1).)+)\1', + '(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title', + group='title') + description = self._html_search_meta('description', webpage) + timestamp = unified_timestamp(self._html_search_regex( + r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', + default=None) or self._html_search_meta( + 'uploadDate', webpage, 'timestamp', default=None)) + thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage) + + # TODO 'view_count' requires deobfuscating Javascript + info = { + 'id': video_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'thumbnail': thumbnail, + 'duration': float_or_none(video_info.get('timelength'), scale=1000), + } + + uploader_mobj = re.search( + r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>(?P<name>[^<]+)', + webpage) + if uploader_mobj: + info.update({ + 'uploader': uploader_mobj.group('name'), + 'uploader_id': uploader_mobj.group('id'), + }) + if not info.get('uploader'): + info['uploader'] = self._html_search_meta( + 'author', webpage, 'uploader', default=None) + + for entry in entries: + entry.update(info) + + if len(entries) == 1: + return entries[0] + else: + for idx, entry in enumerate(entries): + entry['id'] = '%s_part%d' % (video_id, (idx + 1)) + + return { + '_type': 'multi_video', + 'id': video_id, + 'title': title, + 'description': description, + 'entries': entries, + } + + +class BiliBiliBangumiIE(InfoExtractor): + _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)' + + IE_NAME = 'bangumi.bilibili.com' + IE_DESC = 'BiliBili番剧' + + _TESTS = [{ + 'url': 'http://bangumi.bilibili.com/anime/1869', + 'info_dict': { + 'id': '1869', + 'title': '混沌武士', + 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', + }, + 'playlist_count': 26, + }, { + 'url': 'http://bangumi.bilibili.com/anime/1869', + 'info_dict': { + 'id': '1869', + 'title': '混沌武士', + 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', + }, + 'playlist': [{ + 'md5': '91da8621454dd58316851c27c68b0c13', + 'info_dict': { + 'id': '40062', + 'ext': 'mp4', + 'title': '混沌武士', + 'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...', + 'timestamp': 1414538739, + 'upload_date': '20141028', + 'episode': '疾风怒涛 Tempestuous Temperaments', + 'episode_number': 1, + }, + }], + 'params': { + 'playlist_items': '1', + }, + }] + + @classmethod + def suitable(cls, url): + return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url) + + def _real_extract(self, url): + bangumi_id = self._match_id(url) + + # Sometimes this API returns a JSONP response + season_info = self._download_json( + 'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id, + bangumi_id, transform_source=strip_jsonp)['result'] + + entries = [{ + '_type': 'url_transparent', + 'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}), + 'ie_key': BiliBiliIE.ie_key(), + 'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '), + 'episode': episode.get('index_title'), + 'episode_number': int_or_none(episode.get('index')), + } for episode in season_info['episodes']] + + entries = sorted(entries, key=lambda entry: entry.get('episode_number')) + + return self.playlist_result( + entries, bangumi_id, + season_info.get('bangumi_title'), season_info.get('evaluate')) diff --git a/youtube_dl/extractor/biobiochiletv.py b/youtube_dl/extractor/biobiochiletv.py new file mode 100644 index 000000000..b92031c8a --- /dev/null +++ b/youtube_dl/extractor/biobiochiletv.py @@ -0,0 +1,81 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + remove_end, +) +from .rudo import RudoIE + + +class BioBioChileTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:tv|www)\.biobiochile\.cl/(?:notas|noticias)/(?:[^/]+/)+(?P<id>[^/]+)\.shtml' + + _TESTS = [{ + 'url': 'http://tv.biobiochile.cl/notas/2015/10/21/sobre-camaras-y-camarillas-parlamentarias.shtml', + 'md5': '26f51f03cf580265defefb4518faec09', + 'info_dict': { + 'id': 'sobre-camaras-y-camarillas-parlamentarias', + 'ext': 'mp4', + 'title': 'Sobre Cámaras y camarillas parlamentarias', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Fernando Atria', + }, + 'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html', + }, { + # different uploader layout + 'url': 'http://tv.biobiochile.cl/notas/2016/03/18/natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades.shtml', + 'md5': 'edc2e6b58974c46d5b047dea3c539ff3', + 'info_dict': { + 'id': 'natalia-valdebenito-repasa-a-diputado-hasbun-paso-a-la-categoria-de-hablar-brutalidades', + 'ext': 'mp4', + 'title': 'Natalia Valdebenito repasa a diputado Hasbún: Pasó a la categoría de hablar brutalidades', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Piangella Obrador', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'URL expired and redirected to http://www.biobiochile.cl/portada/bbtv/index.html', + }, { + 'url': 'http://www.biobiochile.cl/noticias/bbtv/comentarios-bio-bio/2016/07/08/edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos.shtml', + 'info_dict': { + 'id': 'edecanes-del-congreso-figuras-decorativas-que-le-cuestan-muy-caro-a-los-chilenos', + 'ext': 'mp4', + 'uploader': '(none)', + 'upload_date': '20160708', + 'title': 'Edecanes del Congreso: Figuras decorativas que le cuestan muy caro a los chilenos', + }, + }, { + 'url': 'http://tv.biobiochile.cl/notas/2015/10/22/ninos-transexuales-de-quien-es-la-decision.shtml', + 'only_matching': True, + }, { + 'url': 'http://tv.biobiochile.cl/notas/2015/10/21/exclusivo-hector-pinto-formador-de-chupete-revela-version-del-ex-delantero-albo.shtml', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + rudo_url = RudoIE._extract_url(webpage) + if not rudo_url: + raise ExtractorError('No videos found') + + title = remove_end(self._og_search_title(webpage), ' - BioBioChile TV') + + thumbnail = self._og_search_thumbnail(webpage) + uploader = self._html_search_regex( + r'<a[^>]+href=["\']https?://(?:busca|www)\.biobiochile\.cl/(?:lista/)?(?:author|autor)[^>]+>(.+?)</a>', + webpage, 'uploader', fatal=False) + + return { + '_type': 'url_transparent', + 'url': rudo_url, + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'uploader': uploader, + } diff --git a/youtube_dl/extractor/biqle.py b/youtube_dl/extractor/biqle.py new file mode 100644 index 000000000..af21e3ee5 --- /dev/null +++ b/youtube_dl/extractor/biqle.py @@ -0,0 +1,97 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .vk import VKIE +from ..utils import ( + HEADRequest, + int_or_none, +) + + +class BIQLEIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)' + _TESTS = [{ + # Youtube embed + 'url': 'https://biqle.ru/watch/-115995369_456239081', + 'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06', + 'info_dict': { + 'id': '8v4f-avW-VI', + 'ext': 'mp4', + 'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer", + 'description': 'Passe-Partout', + 'uploader_id': 'mrsimpsonstef3', + 'uploader': 'Phanolito', + 'upload_date': '20120822', + }, + }, { + 'url': 'http://biqle.org/watch/-44781847_168547604', + 'md5': '7f24e72af1db0edf7c1aaba513174f97', + 'info_dict': { + 'id': '-44781847_168547604', + 'ext': 'mp4', + 'title': 'Ребенок в шоке от автоматической мойки', + 'timestamp': 1396633454, + 'uploader': 'Dmitry Kotov', + 'upload_date': '20140404', + 'uploader_id': '47850140', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + embed_url = self._proto_relative_url(self._search_regex( + r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>', + webpage, 'embed url')) + if VKIE.suitable(embed_url): + return self.url_result(embed_url, VKIE.ie_key(), video_id) + + self._request_webpage( + HEADRequest(embed_url), video_id, headers={'Referer': url}) + video_id, sig, _, access_token = self._get_cookies(embed_url)['video_ext'].value.split('%3A') + item = self._download_json( + 'https://api.vk.com/method/video.get', video_id, + headers={'User-Agent': 'okhttp/3.4.1'}, query={ + 'access_token': access_token, + 'sig': sig, + 'v': 5.44, + 'videos': video_id, + })['response']['items'][0] + title = item['title'] + + formats = [] + for f_id, f_url in item.get('files', {}).items(): + if f_id == 'external': + return self.url_result(f_url) + ext, height = f_id.split('_') + formats.append({ + 'format_id': height + 'p', + 'url': f_url, + 'height': int_or_none(height), + 'ext': ext, + }) + self._sort_formats(formats) + + thumbnails = [] + for k, v in item.items(): + if k.startswith('photo_') and v: + width = k.replace('photo_', '') + thumbnails.append({ + 'id': width, + 'url': v, + 'width': int_or_none(width), + }) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'comment_count': int_or_none(item.get('comments')), + 'description': item.get('description'), + 'duration': int_or_none(item.get('duration')), + 'thumbnails': thumbnails, + 'timestamp': int_or_none(item.get('date')), + 'uploader': item.get('owner_id'), + 'view_count': int_or_none(item.get('views')), + } diff --git a/youtube_dl/extractor/bitchute.py b/youtube_dl/extractor/bitchute.py new file mode 100644 index 000000000..430663fbf --- /dev/null +++ b/youtube_dl/extractor/bitchute.py @@ -0,0 +1,135 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools +import re + +from .common import InfoExtractor +from ..utils import ( + orderedSet, + urlencode_postdata, +) + + +class BitChuteIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.bitchute.com/video/szoMrox2JEI/', + 'md5': '66c4a70e6bfc40dcb6be3eb1d74939eb', + 'info_dict': { + 'id': 'szoMrox2JEI', + 'ext': 'mp4', + 'title': 'Fuck bitches get money', + 'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Victoria X Rave', + }, + }, { + 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/', + 'only_matching': True, + }, { + 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'https://www.bitchute.com/video/%s' % video_id, video_id, headers={ + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36', + }) + + title = self._html_search_regex( + (r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'), + webpage, 'title', default=None) or self._html_search_meta( + 'description', webpage, 'title', + default=None) or self._og_search_description(webpage) + + format_urls = [] + for mobj in re.finditer( + r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage): + format_urls.append(mobj.group('url')) + format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage)) + + formats = [ + {'url': format_url} + for format_url in orderedSet(format_urls)] + + if not formats: + formats = self._parse_html5_media_entries( + url, webpage, video_id)[0]['formats'] + + self._check_formats(formats, video_id) + self._sort_formats(formats) + + description = self._html_search_regex( + r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>', + webpage, 'description', fatal=False) + thumbnail = self._og_search_thumbnail( + webpage, default=None) or self._html_search_meta( + 'twitter:image:src', webpage, 'thumbnail') + uploader = self._html_search_regex( + (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>', + r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'), + webpage, 'uploader', fatal=False) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'formats': formats, + } + + +class BitChuteChannelIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'https://www.bitchute.com/channel/victoriaxrave/', + 'playlist_mincount': 185, + 'info_dict': { + 'id': 'victoriaxrave', + }, + } + + _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7' + + def _entries(self, channel_id): + channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id + offset = 0 + for page_num in itertools.count(1): + data = self._download_json( + '%sextend/' % channel_url, channel_id, + 'Downloading channel page %d' % page_num, + data=urlencode_postdata({ + 'csrfmiddlewaretoken': self._TOKEN, + 'name': '', + 'offset': offset, + }), headers={ + 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', + 'Referer': channel_url, + 'X-Requested-With': 'XMLHttpRequest', + 'Cookie': 'csrftoken=%s' % self._TOKEN, + }) + if data.get('success') is False: + break + html = data.get('html') + if not html: + break + video_ids = re.findall( + r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)', + html) + if not video_ids: + break + offset += len(video_ids) + for video_id in video_ids: + yield self.url_result( + 'https://www.bitchute.com/video/%s' % video_id, + ie=BitChuteIE.ie_key(), video_id=video_id) + + def _real_extract(self, url): + channel_id = self._match_id(url) + return self.playlist_result( + self._entries(channel_id), playlist_id=channel_id) diff --git a/youtube_dl/extractor/bleacherreport.py b/youtube_dl/extractor/bleacherreport.py new file mode 100644 index 000000000..e829974ff --- /dev/null +++ b/youtube_dl/extractor/bleacherreport.py @@ -0,0 +1,106 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .amp import AMPIE +from ..utils import ( + ExtractorError, + int_or_none, + parse_iso8601, +) + + +class BleacherReportIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/articles/(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://bleacherreport.com/articles/2496438-fsu-stat-projections-is-jalen-ramsey-best-defensive-player-in-college-football', + 'md5': 'a3ffc3dc73afdbc2010f02d98f990f20', + 'info_dict': { + 'id': '2496438', + 'ext': 'mp4', + 'title': 'FSU Stat Projections: Is Jalen Ramsey Best Defensive Player in College Football?', + 'uploader_id': 3992341, + 'description': 'CFB, ACC, Florida State', + 'timestamp': 1434380212, + 'upload_date': '20150615', + 'uploader': 'Team Stream Now ', + }, + 'add_ie': ['Ooyala'], + }, { + 'url': 'http://bleacherreport.com/articles/2586817-aussie-golfers-get-fright-of-their-lives-after-being-chased-by-angry-kangaroo', + 'md5': '6a5cd403418c7b01719248ca97fb0692', + 'info_dict': { + 'id': '2586817', + 'ext': 'webm', + 'title': 'Aussie Golfers Get Fright of Their Lives After Being Chased by Angry Kangaroo', + 'timestamp': 1446839961, + 'uploader': 'Sean Fay', + 'description': 'md5:b1601e2314c4d8eec23b6eafe086a757', + 'uploader_id': 6466954, + 'upload_date': '20151011', + }, + 'add_ie': ['Youtube'], + }] + + def _real_extract(self, url): + article_id = self._match_id(url) + + article_data = self._download_json('http://api.bleacherreport.com/api/v1/articles/%s' % article_id, article_id)['article'] + + thumbnails = [] + primary_photo = article_data.get('primaryPhoto') + if primary_photo: + thumbnails = [{ + 'url': primary_photo['url'], + 'width': primary_photo.get('width'), + 'height': primary_photo.get('height'), + }] + + info = { + '_type': 'url_transparent', + 'id': article_id, + 'title': article_data['title'], + 'uploader': article_data.get('author', {}).get('name'), + 'uploader_id': article_data.get('authorId'), + 'timestamp': parse_iso8601(article_data.get('createdAt')), + 'thumbnails': thumbnails, + 'comment_count': int_or_none(article_data.get('commentsCount')), + 'view_count': int_or_none(article_data.get('hitCount')), + } + + video = article_data.get('video') + if video: + video_type = video['type'] + if video_type == 'cms.bleacherreport.com': + info['url'] = 'http://bleacherreport.com/video_embed?id=%s' % video['id'] + elif video_type == 'ooyala.com': + info['url'] = 'ooyala:%s' % video['id'] + elif video_type == 'youtube.com': + info['url'] = video['id'] + elif video_type == 'vine.co': + info['url'] = 'https://vine.co/v/%s' % video['id'] + else: + info['url'] = video_type + video['id'] + return info + else: + raise ExtractorError('no video in the article', expected=True) + + +class BleacherReportCMSIE(AMPIE): + _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36})' + _TESTS = [{ + 'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1', + 'md5': '2e4b0a997f9228ffa31fada5c53d1ed1', + 'info_dict': { + 'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1', + 'ext': 'flv', + 'title': 'Cena vs. Rollins Would Expose the Heavyweight Division', + 'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + info = self._extract_feed_info('http://cms.bleacherreport.com/media/items/%s/akamai.json' % video_id) + info['id'] = video_id + return info diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py new file mode 100644 index 000000000..db5e12b21 --- /dev/null +++ b/youtube_dl/extractor/blinkx.py @@ -0,0 +1,86 @@ +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..utils import ( + remove_start, + int_or_none, +) + + +class BlinkxIE(InfoExtractor): + _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)' + IE_NAME = 'blinkx' + + _TEST = { + 'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ', + 'md5': '337cf7a344663ec79bf93a526a2e06c7', + 'info_dict': { + 'id': 'Da0Gw3xc', + 'ext': 'mp4', + 'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News', + 'uploader': 'IGN News', + 'upload_date': '20150217', + 'timestamp': 1424215740, + 'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.', + 'duration': 47.743333, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + display_id = video_id[:8] + + api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' + + 'video=%s' % video_id) + data_json = self._download_webpage(api_url, display_id) + data = json.loads(data_json)['api']['results'][0] + duration = None + thumbnails = [] + formats = [] + for m in data['media']: + if m['type'] == 'jpg': + thumbnails.append({ + 'url': m['link'], + 'width': int(m['w']), + 'height': int(m['h']), + }) + elif m['type'] == 'original': + duration = float(m['d']) + elif m['type'] == 'youtube': + yt_id = m['link'] + self.to_screen('Youtube video detected: %s' % yt_id) + return self.url_result(yt_id, 'Youtube', video_id=yt_id) + elif m['type'] in ('flv', 'mp4'): + vcodec = remove_start(m['vcodec'], 'ff') + acodec = remove_start(m['acodec'], 'ff') + vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000) + abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000) + tbr = vbr + abr if vbr and abr else None + format_id = '%s-%sk-%s' % (vcodec, tbr, m['w']) + formats.append({ + 'format_id': format_id, + 'url': m['link'], + 'vcodec': vcodec, + 'acodec': acodec, + 'abr': abr, + 'vbr': vbr, + 'tbr': tbr, + 'width': int_or_none(m.get('w')), + 'height': int_or_none(m.get('h')), + }) + + self._sort_formats(formats) + + return { + 'id': display_id, + 'fullid': video_id, + 'title': data['title'], + 'formats': formats, + 'uploader': data['channel_name'], + 'timestamp': data['pubdate_epoch'], + 'description': data.get('description'), + 'thumbnails': thumbnails, + 'duration': duration, + } diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dl/extractor/bloomberg.py new file mode 100644 index 000000000..2fbfad1ba --- /dev/null +++ b/youtube_dl/extractor/bloomberg.py @@ -0,0 +1,83 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class BloombergIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?bloomberg\.com/(?:[^/]+/)*(?P<id>[^/?#]+)' + + _TESTS = [{ + 'url': 'http://www.bloomberg.com/news/videos/b/aaeae121-5949-481e-a1ce-4562db6f5df2', + # The md5 checksum changes + 'info_dict': { + 'id': 'qurhIVlJSB6hzkVi229d8g', + 'ext': 'flv', + 'title': 'Shah\'s Presentation on Foreign-Exchange Strategies', + 'description': 'md5:a8ba0302912d03d246979735c17d2761', + }, + 'params': { + 'format': 'best[format_id^=hds]', + }, + }, { + # video ID in BPlayer(...) + 'url': 'http://www.bloomberg.com/features/2016-hello-world-new-zealand/', + 'info_dict': { + 'id': '938c7e72-3f25-4ddb-8b85-a9be731baa74', + 'ext': 'flv', + 'title': 'Meet the Real-Life Tech Wizards of Middle Earth', + 'description': 'Hello World, Episode 1: New Zealand’s freaky AI babies, robot exoskeletons, and a virtual you.', + }, + 'params': { + 'format': 'best[format_id^=hds]', + }, + }, { + # data-bmmrid= + 'url': 'https://www.bloomberg.com/politics/articles/2017-02-08/le-pen-aide-briefed-french-central-banker-on-plan-to-print-money', + 'only_matching': True, + }, { + 'url': 'http://www.bloomberg.com/news/articles/2015-11-12/five-strange-things-that-have-been-happening-in-financial-markets', + 'only_matching': True, + }, { + 'url': 'http://www.bloomberg.com/politics/videos/2015-11-25/karl-rove-on-jeb-bush-s-struggles-stopping-trump', + 'only_matching': True, + }] + + def _real_extract(self, url): + name = self._match_id(url) + webpage = self._download_webpage(url, name) + video_id = self._search_regex( + (r'["\']bmmrId["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', + r'videoId\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', + r'data-bmmrid=(["\'])(?P<id>(?:(?!\1).)+)\1'), + webpage, 'id', group='id', default=None) + if not video_id: + bplayer_data = self._parse_json(self._search_regex( + r'BPlayer\(null,\s*({[^;]+})\);', webpage, 'id'), name) + video_id = bplayer_data['id'] + title = re.sub(': Video$', '', self._og_search_title(webpage)) + + embed_info = self._download_json( + 'http://www.bloomberg.com/api/embed?id=%s' % video_id, video_id) + formats = [] + for stream in embed_info['streams']: + stream_url = stream.get('url') + if not stream_url: + continue + if stream['muxing_format'] == 'TS': + formats.extend(self._extract_m3u8_formats( + stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + else: + formats.extend(self._extract_f4m_formats( + stream_url, video_id, f4m_id='hds', fatal=False)) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + } diff --git a/youtube_dl/extractor/bokecc.py b/youtube_dl/extractor/bokecc.py new file mode 100644 index 000000000..86a7f4d7d --- /dev/null +++ b/youtube_dl/extractor/bokecc.py @@ -0,0 +1,60 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_parse_qs +from ..utils import ExtractorError + + +class BokeCCBaseIE(InfoExtractor): + def _extract_bokecc_formats(self, webpage, video_id, format_id=None): + player_params_str = self._html_search_regex( + r'<(?:script|embed)[^>]+src="http://p\.bokecc\.com/player\?([^"]+)', + webpage, 'player params') + + player_params = compat_parse_qs(player_params_str) + + info_xml = self._download_xml( + 'http://p.bokecc.com/servlet/playinfo?uid=%s&vid=%s&m=1' % ( + player_params['siteid'][0], player_params['vid'][0]), video_id) + + formats = [{ + 'format_id': format_id, + 'url': quality.find('./copy').attrib['playurl'], + 'preference': int(quality.attrib['value']), + } for quality in info_xml.findall('./video/quality')] + + self._sort_formats(formats) + + return formats + + +class BokeCCIE(BokeCCBaseIE): + _IE_DESC = 'CC视频' + _VALID_URL = r'https?://union\.bokecc\.com/playvideo\.bo\?(?P<query>.*)' + + _TESTS = [{ + 'url': 'http://union.bokecc.com/playvideo.bo?vid=E44D40C15E65EA30&uid=CD0C5D3C8614B28B', + 'info_dict': { + 'id': 'CD0C5D3C8614B28B_E44D40C15E65EA30', + 'ext': 'flv', + 'title': 'BokeCC Video', + }, + }] + + def _real_extract(self, url): + qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query')) + if not qs.get('vid') or not qs.get('uid'): + raise ExtractorError('Invalid URL', expected=True) + + video_id = '%s_%s' % (qs['uid'][0], qs['vid'][0]) + + webpage = self._download_webpage(url, video_id) + + return { + 'id': video_id, + 'title': 'BokeCC Video', # no title provided in the webpage + 'formats': self._extract_bokecc_formats(webpage, video_id), + } diff --git a/youtube_dl/extractor/bostonglobe.py b/youtube_dl/extractor/bostonglobe.py new file mode 100644 index 000000000..57882fbee --- /dev/null +++ b/youtube_dl/extractor/bostonglobe.py @@ -0,0 +1,72 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +from ..utils import ( + extract_attributes, +) + + +class BostonGlobeIE(InfoExtractor): + _VALID_URL = r'(?i)https?://(?:www\.)?bostonglobe\.com/.*/(?P<id>[^/]+)/\w+(?:\.html)?' + _TESTS = [ + { + 'url': 'http://www.bostonglobe.com/metro/2017/02/11/tree-finally-succumbs-disease-leaving-hole-neighborhood/h1b4lviqzMTIn9sVy8F3gP/story.html', + 'md5': '0a62181079c85c2d2b618c9a738aedaf', + 'info_dict': { + 'title': 'A tree finally succumbs to disease, leaving a hole in a neighborhood', + 'id': '5320421710001', + 'ext': 'mp4', + 'description': 'It arrived as a sapling when the Back Bay was in its infancy, a spindly American elm tamped down into a square of dirt cut into the brick sidewalk of 1880s Marlborough Street, no higher than the first bay window of the new brownstone behind it.', + 'timestamp': 1486877593, + 'upload_date': '20170212', + 'uploader_id': '245991542', + }, + }, + { + # Embedded youtube video; we hand it off to the Generic extractor. + 'url': 'https://www.bostonglobe.com/lifestyle/names/2017/02/17/does-ben-affleck-play-matt-damon-favorite-version-batman/ruqkc9VxKBYmh5txn1XhSI/story.html', + 'md5': '582b40327089d5c0c949b3c54b13c24b', + 'info_dict': { + 'title': "Who Is Matt Damon's Favorite Batman?", + 'id': 'ZW1QCnlA6Qc', + 'ext': 'mp4', + 'upload_date': '20170217', + 'description': 'md5:3b3dccb9375867e0b4d527ed87d307cb', + 'uploader': 'The Late Late Show with James Corden', + 'uploader_id': 'TheLateLateShow', + }, + 'expected_warnings': ['404'], + }, + ] + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + + page_title = self._og_search_title(webpage, default=None) + + # <video data-brightcove-video-id="5320421710001" data-account="245991542" data-player="SJWAiyYWg" data-embed="default" class="video-js" controls itemscope itemtype="http://schema.org/VideoObject"> + entries = [] + for video in re.findall(r'(?i)(<video[^>]+>)', webpage): + attrs = extract_attributes(video) + + video_id = attrs.get('data-brightcove-video-id') + account_id = attrs.get('data-account') + player_id = attrs.get('data-player') + embed = attrs.get('data-embed') + + if video_id and account_id and player_id and embed: + entries.append( + 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' + % (account_id, player_id, embed, video_id)) + + if len(entries) == 0: + return self.url_result(url, 'Generic') + elif len(entries) == 1: + return self.url_result(entries[0], 'BrightcoveNew') + else: + return self.playlist_from_matches(entries, page_id, page_title, ie='BrightcoveNew') diff --git a/youtube_dl/extractor/bpb.py b/youtube_dl/extractor/bpb.py new file mode 100644 index 000000000..07833532e --- /dev/null +++ b/youtube_dl/extractor/bpb.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + js_to_json, + determine_ext, +) + + +class BpbIE(InfoExtractor): + IE_DESC = 'Bundeszentrale für politische Bildung' + _VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/' + + _TEST = { + 'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr', + # md5 fails in Python 2.6 due to buggy server response and wrong handling of urllib2 + 'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f', + 'info_dict': { + 'id': '297', + 'ext': 'mp4', + 'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR', + 'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex( + r'<h2 class="white">(.*?)</h2>', webpage, 'title') + video_info_dicts = re.findall( + r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage) + + formats = [] + for video_info in video_info_dicts: + video_info = self._parse_json( + video_info, video_id, transform_source=js_to_json, fatal=False) + if not video_info: + continue + video_url = video_info.get('src') + if not video_url: + continue + quality = 'high' if '_high' in video_url else 'low' + formats.append({ + 'url': video_url, + 'preference': 10 if quality == 'high' else 0, + 'format_note': quality, + 'format_id': '%s-%s' % (quality, determine_ext(video_url)), + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'title': title, + 'description': self._og_search_description(webpage), + } diff --git a/youtube_dl/extractor/br.py b/youtube_dl/extractor/br.py new file mode 100644 index 000000000..9bde7f2d8 --- /dev/null +++ b/youtube_dl/extractor/br.py @@ -0,0 +1,311 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, + parse_duration, + parse_iso8601, + xpath_element, + xpath_text, +) + + +class BRIE(InfoExtractor): + IE_DESC = 'Bayerischer Rundfunk' + _VALID_URL = r'(?P<base_url>https?://(?:www\.)?br(?:-klassik)?\.de)/(?:[a-z0-9\-_]+/)+(?P<id>[a-z0-9\-_]+)\.html' + + _TESTS = [ + { + 'url': 'http://www.br.de/mediathek/video/sendungen/abendschau/betriebliche-altersvorsorge-104.html', + 'md5': '83a0477cf0b8451027eb566d88b51106', + 'info_dict': { + 'id': '48f656ef-287e-486f-be86-459122db22cc', + 'ext': 'mp4', + 'title': 'Die böse Überraschung', + 'description': 'md5:ce9ac81b466ce775b8018f6801b48ac9', + 'duration': 180, + 'uploader': 'Reinhard Weber', + 'upload_date': '20150422', + }, + 'skip': '404 not found', + }, + { + 'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html', + 'md5': 'af3a3a4aa43ff0ce6a89504c67f427ef', + 'info_dict': { + 'id': 'a4b83e34-123d-4b81-9f4e-c0d3121a4e05', + 'ext': 'flv', + 'title': 'Manfred Schreiber ist tot', + 'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97', + 'duration': 26, + }, + 'skip': '404 not found', + }, + { + 'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html', + 'md5': '8b5b27c0b090f3b35eac4ab3f7a73d3d', + 'info_dict': { + 'id': '74c603c9-26d3-48bb-b85b-079aeed66e0b', + 'ext': 'aac', + 'title': 'Kurzweilig und sehr bewegend', + 'description': 'md5:0351996e3283d64adeb38ede91fac54e', + 'duration': 296, + }, + 'skip': '404 not found', + }, + { + 'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html', + 'md5': 'dbab0aef2e047060ea7a21fc1ce1078a', + 'info_dict': { + 'id': '6ba73750-d405-45d3-861d-1ce8c524e059', + 'ext': 'mp4', + 'title': 'Umweltbewusster Häuslebauer', + 'description': 'md5:d52dae9792d00226348c1dbb13c9bae2', + 'duration': 116, + } + }, + { + 'url': 'http://www.br.de/fernsehen/br-alpha/sendungen/kant-fuer-anfaenger/kritik-der-reinen-vernunft/kant-kritik-01-metaphysik100.html', + 'md5': '23bca295f1650d698f94fc570977dae3', + 'info_dict': { + 'id': 'd982c9ce-8648-4753-b358-98abb8aec43d', + 'ext': 'mp4', + 'title': 'Folge 1 - Metaphysik', + 'description': 'md5:bb659990e9e59905c3d41e369db1fbe3', + 'duration': 893, + 'uploader': 'Eva Maria Steimle', + 'upload_date': '20170208', + } + }, + ] + + def _real_extract(self, url): + base_url, display_id = re.search(self._VALID_URL, url).groups() + page = self._download_webpage(url, display_id) + xml_url = self._search_regex( + r"return BRavFramework\.register\(BRavFramework\('avPlayer_(?:[a-f0-9-]{36})'\)\.setup\({dataURL:'(/(?:[a-z0-9\-]+/)+[a-z0-9/~_.-]+)'}\)\);", page, 'XMLURL') + xml = self._download_xml(base_url + xml_url, display_id) + + medias = [] + + for xml_media in xml.findall('video') + xml.findall('audio'): + media_id = xml_media.get('externalId') + media = { + 'id': media_id, + 'title': xpath_text(xml_media, 'title', 'title', True), + 'duration': parse_duration(xpath_text(xml_media, 'duration')), + 'formats': self._extract_formats(xpath_element( + xml_media, 'assets'), media_id), + 'thumbnails': self._extract_thumbnails(xpath_element( + xml_media, 'teaserImage/variants'), base_url), + 'description': xpath_text(xml_media, 'desc'), + 'webpage_url': xpath_text(xml_media, 'permalink'), + 'uploader': xpath_text(xml_media, 'author'), + } + broadcast_date = xpath_text(xml_media, 'broadcastDate') + if broadcast_date: + media['upload_date'] = ''.join(reversed(broadcast_date.split('.'))) + medias.append(media) + + if len(medias) > 1: + self._downloader.report_warning( + 'found multiple medias; please ' + 'report this with the video URL to http://yt-dl.org/bug') + if not medias: + raise ExtractorError('No media entries found') + return medias[0] + + def _extract_formats(self, assets, media_id): + formats = [] + for asset in assets.findall('asset'): + format_url = xpath_text(asset, ['downloadUrl', 'url']) + asset_type = asset.get('type') + if asset_type.startswith('HDS'): + formats.extend(self._extract_f4m_formats( + format_url + '?hdcore=3.2.0', media_id, f4m_id='hds', fatal=False)) + elif asset_type.startswith('HLS'): + formats.extend(self._extract_m3u8_formats( + format_url, media_id, 'mp4', 'm3u8_native', m3u8_id='hds', fatal=False)) + else: + format_info = { + 'ext': xpath_text(asset, 'mediaType'), + 'width': int_or_none(xpath_text(asset, 'frameWidth')), + 'height': int_or_none(xpath_text(asset, 'frameHeight')), + 'tbr': int_or_none(xpath_text(asset, 'bitrateVideo')), + 'abr': int_or_none(xpath_text(asset, 'bitrateAudio')), + 'vcodec': xpath_text(asset, 'codecVideo'), + 'acodec': xpath_text(asset, 'codecAudio'), + 'container': xpath_text(asset, 'mediaType'), + 'filesize': int_or_none(xpath_text(asset, 'size')), + } + format_url = self._proto_relative_url(format_url) + if format_url: + http_format_info = format_info.copy() + http_format_info.update({ + 'url': format_url, + 'format_id': 'http-%s' % asset_type, + }) + formats.append(http_format_info) + server_prefix = xpath_text(asset, 'serverPrefix') + if server_prefix: + rtmp_format_info = format_info.copy() + rtmp_format_info.update({ + 'url': server_prefix, + 'play_path': xpath_text(asset, 'fileName'), + 'format_id': 'rtmp-%s' % asset_type, + }) + formats.append(rtmp_format_info) + self._sort_formats(formats) + return formats + + def _extract_thumbnails(self, variants, base_url): + thumbnails = [{ + 'url': base_url + xpath_text(variant, 'url'), + 'width': int_or_none(xpath_text(variant, 'width')), + 'height': int_or_none(xpath_text(variant, 'height')), + } for variant in variants.findall('variant') if xpath_text(variant, 'url')] + thumbnails.sort(key=lambda x: x['width'] * x['height'], reverse=True) + return thumbnails + + +class BRMediathekIE(InfoExtractor): + IE_DESC = 'Bayerischer Rundfunk Mediathek' + _VALID_URL = r'https?://(?:www\.)?br\.de/mediathek/video/[^/?&#]*?-(?P<id>av:[0-9a-f]{24})' + + _TESTS = [{ + 'url': 'https://www.br.de/mediathek/video/gesundheit-die-sendung-vom-28112017-av:5a1e6a6e8fce6d001871cc8e', + 'md5': 'fdc3d485835966d1622587d08ba632ec', + 'info_dict': { + 'id': 'av:5a1e6a6e8fce6d001871cc8e', + 'ext': 'mp4', + 'title': 'Die Sendung vom 28.11.2017', + 'description': 'md5:6000cdca5912ab2277e5b7339f201ccc', + 'timestamp': 1511942766, + 'upload_date': '20171129', + } + }] + + def _real_extract(self, url): + clip_id = self._match_id(url) + + clip = self._download_json( + 'https://proxy-base.master.mango.express/graphql', + clip_id, data=json.dumps({ + "query": """{ + viewer { + clip(id: "%s") { + title + description + duration + createdAt + ageRestriction + videoFiles { + edges { + node { + publicLocation + fileSize + videoProfile { + width + height + bitrate + encoding + } + } + } + } + captionFiles { + edges { + node { + publicLocation + } + } + } + teaserImages { + edges { + node { + imageFiles { + edges { + node { + publicLocation + width + height + } + } + } + } + } + } + } + } +}""" % clip_id}).encode(), headers={ + 'Content-Type': 'application/json', + })['data']['viewer']['clip'] + title = clip['title'] + + formats = [] + for edge in clip.get('videoFiles', {}).get('edges', []): + node = edge.get('node', {}) + n_url = node.get('publicLocation') + if not n_url: + continue + ext = determine_ext(n_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + n_url, clip_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + else: + video_profile = node.get('videoProfile', {}) + tbr = int_or_none(video_profile.get('bitrate')) + format_id = 'http' + if tbr: + format_id += '-%d' % tbr + formats.append({ + 'format_id': format_id, + 'url': n_url, + 'width': int_or_none(video_profile.get('width')), + 'height': int_or_none(video_profile.get('height')), + 'tbr': tbr, + 'filesize': int_or_none(node.get('fileSize')), + }) + self._sort_formats(formats) + + subtitles = {} + for edge in clip.get('captionFiles', {}).get('edges', []): + node = edge.get('node', {}) + n_url = node.get('publicLocation') + if not n_url: + continue + subtitles.setdefault('de', []).append({ + 'url': n_url, + }) + + thumbnails = [] + for edge in clip.get('teaserImages', {}).get('edges', []): + for image_edge in edge.get('node', {}).get('imageFiles', {}).get('edges', []): + node = image_edge.get('node', {}) + n_url = node.get('publicLocation') + if not n_url: + continue + thumbnails.append({ + 'url': n_url, + 'width': int_or_none(node.get('width')), + 'height': int_or_none(node.get('height')), + }) + + return { + 'id': clip_id, + 'title': title, + 'description': clip.get('description'), + 'duration': int_or_none(clip.get('duration')), + 'timestamp': parse_iso8601(clip.get('createdAt')), + 'age_limit': int_or_none(clip.get('ageRestriction')), + 'formats': formats, + 'subtitles': subtitles, + 'thumbnails': thumbnails, + } diff --git a/youtube_dl/extractor/bravotv.py b/youtube_dl/extractor/bravotv.py new file mode 100644 index 000000000..b9715df00 --- /dev/null +++ b/youtube_dl/extractor/bravotv.py @@ -0,0 +1,84 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .adobepass import AdobePassIE +from ..utils import ( + smuggle_url, + update_url_query, + int_or_none, +) + + +class BravoTVIE(AdobePassIE): + _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)' + _TESTS = [{ + 'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is', + 'md5': 'e34684cfea2a96cd2ee1ef3a60909de9', + 'info_dict': { + 'id': 'epL0pmK1kQlT', + 'ext': 'mp4', + 'title': 'The Top Chef Season 16 Winner Is...', + 'description': 'Find out who takes the title of Top Chef!', + 'uploader': 'NBCU-BRAV', + 'upload_date': '20190314', + 'timestamp': 1552591860, + } + }, { + 'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + settings = self._parse_json(self._search_regex( + r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'), + display_id) + info = {} + query = { + 'mbr': 'true', + } + account_pid, release_pid = [None] * 2 + tve = settings.get('ls_tve') + if tve: + query['manifest'] = 'm3u' + mobj = re.search(r'<[^>]+id="pdk-player"[^>]+data-url=["\']?(?:https?:)?//player\.theplatform\.com/p/([^/]+)/(?:[^/]+/)*select/([^?#&"\']+)', webpage) + if mobj: + account_pid, tp_path = mobj.groups() + release_pid = tp_path.strip('/').split('/')[-1] + else: + account_pid = 'HNK2IC' + tp_path = release_pid = tve['release_pid'] + if tve.get('entitlement') == 'auth': + adobe_pass = settings.get('tve_adobe_auth', {}) + resource = self._get_mvpd_resource( + adobe_pass.get('adobePassResourceId', 'bravo'), + tve['title'], release_pid, tve.get('rating')) + query['auth'] = self._extract_mvpd_auth( + url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource) + else: + shared_playlist = settings['ls_playlist'] + account_pid = shared_playlist['account_pid'] + metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']] + tp_path = release_pid = metadata.get('release_pid') + if not release_pid: + release_pid = metadata['guid'] + tp_path = 'media/guid/2140479951/' + release_pid + info.update({ + 'title': metadata['title'], + 'description': metadata.get('description'), + 'season_number': int_or_none(metadata.get('season_num')), + 'episode_number': int_or_none(metadata.get('episode_num')), + }) + query['switch'] = 'progressive' + info.update({ + '_type': 'url_transparent', + 'id': release_pid, + 'url': smuggle_url(update_url_query( + 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path), + query), {'force_smil_url': True}), + 'ie_key': 'ThePlatform', + }) + return info diff --git a/youtube_dl/extractor/breakcom.py b/youtube_dl/extractor/breakcom.py new file mode 100644 index 000000000..68c7cf2bb --- /dev/null +++ b/youtube_dl/extractor/breakcom.py @@ -0,0 +1,91 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .youtube import YoutubeIE +from ..utils import ( + int_or_none, + url_or_none, +) + + +class BreakIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?break\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' + _TESTS = [{ + 'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056', + 'info_dict': { + 'id': '2468056', + 'ext': 'mp4', + 'title': 'When Girls Act Like D-Bags', + 'age_limit': 13, + }, + }, { + # youtube embed + 'url': 'http://www.break.com/video/someone-forgot-boat-brakes-work', + 'info_dict': { + 'id': 'RrrDLdeL2HQ', + 'ext': 'mp4', + 'title': 'Whale Watching Boat Crashing Into San Diego Dock', + 'description': 'md5:afc1b2772f0a8468be51dd80eb021069', + 'upload_date': '20160331', + 'uploader': 'Steve Holden', + 'uploader_id': 'sdholden07', + }, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'http://www.break.com/video/ugc/baby-flex-2773063', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id, video_id = re.match(self._VALID_URL, url).groups() + + webpage = self._download_webpage(url, display_id) + + youtube_url = YoutubeIE._extract_url(webpage) + if youtube_url: + return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) + + content = self._parse_json( + self._search_regex( + r'(?s)content["\']\s*:\s*(\[.+?\])\s*[,\n]', webpage, + 'content'), + display_id) + + formats = [] + for video in content: + video_url = url_or_none(video.get('url')) + if not video_url: + continue + bitrate = int_or_none(self._search_regex( + r'(\d+)_kbps', video_url, 'tbr', default=None)) + formats.append({ + 'url': video_url, + 'format_id': 'http-%d' % bitrate if bitrate else 'http', + 'tbr': bitrate, + }) + self._sort_formats(formats) + + title = self._search_regex( + (r'title["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', + r'<h1[^>]*>(?P<value>[^<]+)'), webpage, 'title', group='value') + + def get(key, name): + return int_or_none(self._search_regex( + r'%s["\']\s*:\s*["\'](\d+)' % key, webpage, name, + default=None)) + + age_limit = get('ratings', 'age limit') + video_id = video_id or get('pid', 'video id') or display_id + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'thumbnail': self._og_search_thumbnail(webpage), + 'age_limit': age_limit, + 'formats': formats, + } diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py new file mode 100644 index 000000000..58ec5c979 --- /dev/null +++ b/youtube_dl/extractor/brightcove.py @@ -0,0 +1,797 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import json +import re +import struct + +from .common import InfoExtractor +from .adobepass import AdobePassIE +from ..compat import ( + compat_etree_fromstring, + compat_parse_qs, + compat_str, + compat_urllib_parse_urlparse, + compat_urlparse, + compat_xml_parse_error, + compat_HTTPError, +) +from ..utils import ( + determine_ext, + ExtractorError, + extract_attributes, + find_xpath_attr, + fix_xml_ampersands, + float_or_none, + js_to_json, + int_or_none, + parse_iso8601, + unescapeHTML, + unsmuggle_url, + update_url_query, + clean_html, + mimetype2ext, +) + + +class BrightcoveLegacyIE(InfoExtractor): + IE_NAME = 'brightcove:legacy' + _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)' + _FEDERATED_URL = 'http://c.brightcove.com/services/viewer/htmlFederated' + + _TESTS = [ + { + # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/ + 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001', + 'md5': '5423e113865d26e40624dce2e4b45d95', + 'note': 'Test Brightcove downloads and detection in GenericIE', + 'info_dict': { + 'id': '2371591881001', + 'ext': 'mp4', + 'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', + 'uploader': '8TV', + 'description': 'md5:a950cc4285c43e44d763d036710cd9cd', + 'timestamp': 1368213670, + 'upload_date': '20130510', + 'uploader_id': '1589608506001', + } + }, + { + # From http://medianetwork.oracle.com/video/player/1785452137001 + 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001', + 'info_dict': { + 'id': '1785452137001', + 'ext': 'flv', + 'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges', + 'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.', + 'uploader': 'Oracle', + 'timestamp': 1344975024, + 'upload_date': '20120814', + 'uploader_id': '1460825906', + }, + }, + { + # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/ + 'url': 'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001', + 'info_dict': { + 'id': '2750934548001', + 'ext': 'mp4', + 'title': 'This Bracelet Acts as a Personal Thermostat', + 'description': 'md5:547b78c64f4112766ccf4e151c20b6a0', + 'uploader': 'Mashable', + 'timestamp': 1382041798, + 'upload_date': '20131017', + 'uploader_id': '1130468786001', + }, + }, + { + # test that the default referer works + # from http://national.ballet.ca/interact/video/Lost_in_Motion_II/ + 'url': 'http://link.brightcove.com/services/player/bcpid756015033001?bckey=AQ~~,AAAApYJi_Ck~,GxhXCegT1Dp39ilhXuxMJxasUhVNZiil&bctid=2878862109001', + 'info_dict': { + 'id': '2878862109001', + 'ext': 'mp4', + 'title': 'Lost in Motion II', + 'description': 'md5:363109c02998fee92ec02211bd8000df', + 'uploader': 'National Ballet of Canada', + }, + 'skip': 'Video gone', + }, + { + # test flv videos served by akamaihd.net + # From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william + 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3Aevent-stream-356&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D', + # The md5 checksum changes on each download + 'info_dict': { + 'id': '3750436379001', + 'ext': 'flv', + 'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', + 'uploader': 'RBTV Old (do not use)', + 'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', + 'timestamp': 1409122195, + 'upload_date': '20140827', + 'uploader_id': '710858724001', + }, + 'skip': 'Video gone', + }, + { + # playlist with 'videoList' + # from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players + 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL', + 'info_dict': { + 'title': 'Sealife', + 'id': '3550319591001', + }, + 'playlist_mincount': 7, + }, + { + # playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965) + 'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg', + 'info_dict': { + 'id': '1522758701001', + 'title': 'Lesson 08', + }, + 'playlist_mincount': 10, + }, + { + # playerID inferred from bcpid + # from http://www.un.org/chinese/News/story.asp?NewsID=27724 + 'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350', + 'only_matching': True, # Tested in GenericIE + } + ] + FLV_VCODECS = { + 1: 'SORENSON', + 2: 'ON2', + 3: 'H264', + 4: 'VP8', + } + + @classmethod + def _build_brighcove_url(cls, object_str): + """ + Build a Brightcove url from a xml string containing + <object class="BrightcoveExperience">{params}</object> + """ + + # Fix up some stupid HTML, see https://github.com/ytdl-org/youtube-dl/issues/1553 + object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>', + lambda m: m.group(1) + '/>', object_str) + # Fix up some stupid XML, see https://github.com/ytdl-org/youtube-dl/issues/1608 + object_str = object_str.replace('<--', '<!--') + # remove namespace to simplify extraction + object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str) + object_str = fix_xml_ampersands(object_str) + + try: + object_doc = compat_etree_fromstring(object_str.encode('utf-8')) + except compat_xml_parse_error: + return + + fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars') + if fv_el is not None: + flashvars = dict( + (k, v[0]) + for k, v in compat_parse_qs(fv_el.attrib['value']).items()) + else: + flashvars = {} + + data_url = object_doc.attrib.get('data', '') + data_url_params = compat_parse_qs(compat_urllib_parse_urlparse(data_url).query) + + def find_param(name): + if name in flashvars: + return flashvars[name] + node = find_xpath_attr(object_doc, './param', 'name', name) + if node is not None: + return node.attrib['value'] + return data_url_params.get(name) + + params = {} + + playerID = find_param('playerID') or find_param('playerId') + if playerID is None: + raise ExtractorError('Cannot find player ID') + params['playerID'] = playerID + + playerKey = find_param('playerKey') + # Not all pages define this value + if playerKey is not None: + params['playerKey'] = playerKey + # These fields hold the id of the video + videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList') + if videoPlayer is not None: + if isinstance(videoPlayer, list): + videoPlayer = videoPlayer[0] + videoPlayer = videoPlayer.strip() + # UUID is also possible for videoPlayer (e.g. + # http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd + # or http://www8.hp.com/cn/zh/home.html) + if not (re.match( + r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$', + videoPlayer) or videoPlayer.startswith('ref:')): + return None + params['@videoPlayer'] = videoPlayer + linkBase = find_param('linkBaseURL') + if linkBase is not None: + params['linkBaseURL'] = linkBase + return cls._make_brightcove_url(params) + + @classmethod + def _build_brighcove_url_from_js(cls, object_js): + # The layout of JS is as follows: + # customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) { + # // build Brightcove <object /> XML + # } + m = re.search( + r'''(?x)customBC\.createVideo\( + .*? # skipping width and height + ["\'](?P<playerID>\d+)["\']\s*,\s* # playerID + ["\'](?P<playerKey>AQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters + # in length, however it's appended to itself + # in places, so truncate + ["\'](?P<videoID>\d+)["\'] # @videoPlayer + ''', object_js) + if m: + return cls._make_brightcove_url(m.groupdict()) + + @classmethod + def _make_brightcove_url(cls, params): + return update_url_query(cls._FEDERATED_URL, params) + + @classmethod + def _extract_brightcove_url(cls, webpage): + """Try to extract the brightcove url from the webpage, returns None + if it can't be found + """ + urls = cls._extract_brightcove_urls(webpage) + return urls[0] if urls else None + + @classmethod + def _extract_brightcove_urls(cls, webpage): + """Return a list of all Brightcove URLs from the webpage """ + + url_m = re.search( + r'''(?x) + <meta\s+ + (?:property|itemprop)=([\'"])(?:og:video|embedURL)\1[^>]+ + content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2 + ''', webpage) + if url_m: + url = unescapeHTML(url_m.group('url')) + # Some sites don't add it, we can't download with this url, for example: + # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/ + if 'playerKey' in url or 'videoId' in url or 'idVideo' in url: + return [url] + + matches = re.findall( + r'''(?sx)<object + (?: + [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] | + [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ + ).+?>\s*</object>''', + webpage) + if matches: + return list(filter(None, [cls._build_brighcove_url(m) for m in matches])) + + matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage) + if matches: + return list(filter(None, [ + cls._build_brighcove_url_from_js(custom_bc) + for custom_bc in matches])) + return [src for _, src in re.findall( + r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)] + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + + # Change the 'videoId' and others field to '@videoPlayer' + url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url) + # Change bckey (used by bcove.me urls) to playerKey + url = re.sub(r'(?<=[?&])bckey', 'playerKey', url) + mobj = re.match(self._VALID_URL, url) + query_str = mobj.group('query') + query = compat_urlparse.parse_qs(query_str) + + videoPlayer = query.get('@videoPlayer') + if videoPlayer: + # We set the original url as the default 'Referer' header + referer = smuggled_data.get('Referer', url) + if 'playerID' not in query: + mobj = re.search(r'/bcpid(\d+)', url) + if mobj is not None: + query['playerID'] = [mobj.group(1)] + return self._get_video_info( + videoPlayer[0], query, referer=referer) + elif 'playerKey' in query: + player_key = query['playerKey'] + return self._get_playlist_info(player_key[0]) + else: + raise ExtractorError( + 'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?', + expected=True) + + def _brightcove_new_url_result(self, publisher_id, video_id): + brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id) + return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id) + + def _get_video_info(self, video_id, query, referer=None): + headers = {} + linkBase = query.get('linkBaseURL') + if linkBase is not None: + referer = linkBase[0] + if referer is not None: + headers['Referer'] = referer + webpage = self._download_webpage(self._FEDERATED_URL, video_id, headers=headers, query=query) + + error_msg = self._html_search_regex( + r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage, + 'error message', default=None) + if error_msg is not None: + publisher_id = query.get('publisherId') + if publisher_id and publisher_id[0].isdigit(): + publisher_id = publisher_id[0] + if not publisher_id: + player_key = query.get('playerKey') + if player_key and ',' in player_key[0]: + player_key = player_key[0] + else: + player_id = query.get('playerID') + if player_id and player_id[0].isdigit(): + player_page = self._download_webpage( + 'http://link.brightcove.com/services/player/bcpid' + player_id[0], + video_id, headers=headers, fatal=False) + if player_page: + player_key = self._search_regex( + r'<param\s+name="playerKey"\s+value="([\w~,-]+)"', + player_page, 'player key', fatal=False) + if player_key: + enc_pub_id = player_key.split(',')[1].replace('~', '=') + publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0] + if publisher_id: + return self._brightcove_new_url_result(publisher_id, video_id) + raise ExtractorError( + 'brightcove said: %s' % error_msg, expected=True) + + self.report_extraction(video_id) + info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json') + info = json.loads(info)['data'] + video_info = info['programmedContent']['videoPlayer']['mediaDTO'] + video_info['_youtubedl_adServerURL'] = info.get('adServerURL') + + return self._extract_video_info(video_info) + + def _get_playlist_info(self, player_key): + info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key + playlist_info = self._download_webpage( + info_url, player_key, 'Downloading playlist information') + + json_data = json.loads(playlist_info) + if 'videoList' in json_data: + playlist_info = json_data['videoList'] + playlist_dto = playlist_info['mediaCollectionDTO'] + elif 'playlistTabs' in json_data: + playlist_info = json_data['playlistTabs'] + playlist_dto = playlist_info['lineupListDTO']['playlistDTOs'][0] + else: + raise ExtractorError('Empty playlist') + + videos = [self._extract_video_info(video_info) for video_info in playlist_dto['videoDTOs']] + + return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'], + playlist_title=playlist_dto['displayName']) + + def _extract_video_info(self, video_info): + video_id = compat_str(video_info['id']) + publisher_id = video_info.get('publisherId') + info = { + 'id': video_id, + 'title': video_info['displayName'].strip(), + 'description': video_info.get('shortDescription'), + 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), + 'uploader': video_info.get('publisherName'), + 'uploader_id': compat_str(publisher_id) if publisher_id else None, + 'duration': float_or_none(video_info.get('length'), 1000), + 'timestamp': int_or_none(video_info.get('creationDate'), 1000), + } + + renditions = video_info.get('renditions', []) + video_info.get('IOSRenditions', []) + if renditions: + formats = [] + for rend in renditions: + url = rend['defaultURL'] + if not url: + continue + ext = None + if rend['remote']: + url_comp = compat_urllib_parse_urlparse(url) + if url_comp.path.endswith('.m3u8'): + formats.extend( + self._extract_m3u8_formats( + url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + continue + elif 'akamaihd.net' in url_comp.netloc: + # This type of renditions are served through + # akamaihd.net, but they don't use f4m manifests + url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB' + ext = 'flv' + if ext is None: + ext = determine_ext(url) + tbr = int_or_none(rend.get('encodingRate'), 1000) + a_format = { + 'format_id': 'http%s' % ('-%s' % tbr if tbr else ''), + 'url': url, + 'ext': ext, + 'filesize': int_or_none(rend.get('size')) or None, + 'tbr': tbr, + } + if rend.get('audioOnly'): + a_format.update({ + 'vcodec': 'none', + }) + else: + a_format.update({ + 'height': int_or_none(rend.get('frameHeight')), + 'width': int_or_none(rend.get('frameWidth')), + 'vcodec': rend.get('videoCodec'), + }) + + # m3u8 manifests with remote == false are media playlists + # Not calling _extract_m3u8_formats here to save network traffic + if ext == 'm3u8': + a_format.update({ + 'format_id': 'hls%s' % ('-%s' % tbr if tbr else ''), + 'ext': 'mp4', + 'protocol': 'm3u8_native', + }) + + formats.append(a_format) + self._sort_formats(formats) + info['formats'] = formats + elif video_info.get('FLVFullLengthURL') is not None: + info.update({ + 'url': video_info['FLVFullLengthURL'], + 'vcodec': self.FLV_VCODECS.get(video_info.get('FLVFullCodec')), + 'filesize': int_or_none(video_info.get('FLVFullSize')), + }) + + if self._downloader.params.get('include_ads', False): + adServerURL = video_info.get('_youtubedl_adServerURL') + if adServerURL: + ad_info = { + '_type': 'url', + 'url': adServerURL, + } + if 'url' in info: + return { + '_type': 'playlist', + 'title': info['title'], + 'entries': [ad_info, info], + } + else: + return ad_info + + if not info.get('url') and not info.get('formats'): + uploader_id = info.get('uploader_id') + if uploader_id: + info.update(self._brightcove_new_url_result(uploader_id, video_id)) + else: + raise ExtractorError('Unable to extract video url for %s' % video_id) + return info + + +class BrightcoveNewIE(AdobePassIE): + IE_NAME = 'brightcove:new' + _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*(?P<content_type>video|playlist)Id=(?P<video_id>\d+|ref:[^&]+)' + _TESTS = [{ + 'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001', + 'md5': 'c8100925723840d4b0d243f7025703be', + 'info_dict': { + 'id': '4463358922001', + 'ext': 'mp4', + 'title': 'Meet the man behind Popcorn Time', + 'description': 'md5:eac376a4fe366edc70279bfb681aea16', + 'duration': 165.768, + 'timestamp': 1441391203, + 'upload_date': '20150904', + 'uploader_id': '929656772001', + 'formats': 'mincount:20', + }, + }, { + # with rtmp streams + 'url': 'http://players.brightcove.net/4036320279001/5d112ed9-283f-485f-a7f9-33f42e8bc042_default/index.html?videoId=4279049078001', + 'info_dict': { + 'id': '4279049078001', + 'ext': 'mp4', + 'title': 'Titansgrave: Chapter 0', + 'description': 'Titansgrave: Chapter 0', + 'duration': 1242.058, + 'timestamp': 1433556729, + 'upload_date': '20150606', + 'uploader_id': '4036320279001', + 'formats': 'mincount:39', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + # playlist stream + 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001', + 'info_dict': { + 'id': '5718313430001', + 'title': 'No Audio Playlist', + }, + 'playlist_count': 7, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001', + 'only_matching': True, + }, { + # ref: prefixed video id + 'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442', + 'only_matching': True, + }, { + # non numeric ref: prefixed video id + 'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356', + 'only_matching': True, + }, { + # unavailable video without message but with error_code + 'url': 'http://players.brightcove.net/1305187701/c832abfb-641b-44eb-9da0-2fe76786505f_default/index.html?videoId=4377407326001', + 'only_matching': True, + }] + + @staticmethod + def _extract_url(ie, webpage): + urls = BrightcoveNewIE._extract_urls(ie, webpage) + return urls[0] if urls else None + + @staticmethod + def _extract_urls(ie, webpage): + # Reference: + # 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe + # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#tag + # 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript + # 4. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/in-page-embed-player-implementation.html + # 5. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player + + entries = [] + + # Look for iframe embeds [1] + for _, url in re.findall( + r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage): + entries.append(url if url.startswith('http') else 'http:' + url) + + # Look for <video> tags [2] and embed_in_page embeds [3] + # [2] looks like: + for video, script_tag, account_id, player_id, embed in re.findall( + r'''(?isx) + (<video\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>) + (?:.*? + (<script[^>]+ + src=["\'](?:https?:)?//players\.brightcove\.net/ + (\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js + ) + )? + ''', webpage): + attrs = extract_attributes(video) + + # According to examples from [4] it's unclear whether video id + # may be optional and what to do when it is + video_id = attrs.get('data-video-id') + if not video_id: + continue + + account_id = account_id or attrs.get('data-account') + if not account_id: + continue + + player_id = player_id or attrs.get('data-player') or 'default' + embed = embed or attrs.get('data-embed') or 'default' + + bc_url = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % ( + account_id, player_id, embed, video_id) + + # Some brightcove videos may be embedded with video tag only and + # without script tag or any mentioning of brightcove at all. Such + # embeds are considered ambiguous since they are matched based only + # on data-video-id and data-account attributes and in the wild may + # not be brightcove embeds at all. Let's check reconstructed + # brightcove URLs in case of such embeds and only process valid + # ones. By this we ensure there is indeed a brightcove embed. + if not script_tag and not ie._is_valid_url( + bc_url, video_id, 'possible brightcove video'): + continue + + entries.append(bc_url) + + return entries + + def _parse_brightcove_metadata(self, json_data, video_id, headers={}): + title = json_data['name'].strip() + + formats = [] + for source in json_data.get('sources', []): + container = source.get('container') + ext = mimetype2ext(source.get('type')) + src = source.get('src') + # https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object + if ext == 'ism' or container == 'WVM' or source.get('key_systems'): + continue + elif ext == 'm3u8' or container == 'M2TS': + if not src: + continue + formats.extend(self._extract_m3u8_formats( + src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + elif ext == 'mpd': + if not src: + continue + formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False)) + else: + streaming_src = source.get('streaming_src') + stream_name, app_name = source.get('stream_name'), source.get('app_name') + if not src and not streaming_src and (not stream_name or not app_name): + continue + tbr = float_or_none(source.get('avg_bitrate'), 1000) + height = int_or_none(source.get('height')) + width = int_or_none(source.get('width')) + f = { + 'tbr': tbr, + 'filesize': int_or_none(source.get('size')), + 'container': container, + 'ext': ext or container.lower(), + } + if width == 0 and height == 0: + f.update({ + 'vcodec': 'none', + }) + else: + f.update({ + 'width': width, + 'height': height, + 'vcodec': source.get('codec'), + }) + + def build_format_id(kind): + format_id = kind + if tbr: + format_id += '-%dk' % int(tbr) + if height: + format_id += '-%dp' % height + return format_id + + if src or streaming_src: + f.update({ + 'url': src or streaming_src, + 'format_id': build_format_id('http' if src else 'http-streaming'), + 'source_preference': 0 if src else -1, + }) + else: + f.update({ + 'url': app_name, + 'play_path': stream_name, + 'format_id': build_format_id('rtmp'), + }) + formats.append(f) + if not formats: + # for sonyliv.com DRM protected videos + s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl') + if s3_source_url: + formats.append({ + 'url': s3_source_url, + 'format_id': 'source', + }) + + errors = json_data.get('errors') + if not formats and errors: + error = errors[0] + raise ExtractorError( + error.get('message') or error.get('error_subcode') or error['error_code'], expected=True) + + self._sort_formats(formats) + + for f in formats: + f.setdefault('http_headers', {}).update(headers) + + subtitles = {} + for text_track in json_data.get('text_tracks', []): + if text_track.get('src'): + subtitles.setdefault(text_track.get('srclang'), []).append({ + 'url': text_track['src'], + }) + + is_live = False + duration = float_or_none(json_data.get('duration'), 1000) + if duration is not None and duration <= 0: + is_live = True + + return { + 'id': video_id, + 'title': self._live_title(title) if is_live else title, + 'description': clean_html(json_data.get('description')), + 'thumbnail': json_data.get('thumbnail') or json_data.get('poster'), + 'duration': duration, + 'timestamp': parse_iso8601(json_data.get('published_at')), + 'uploader_id': json_data.get('account_id'), + 'formats': formats, + 'subtitles': subtitles, + 'tags': json_data.get('tags', []), + 'is_live': is_live, + } + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + self._initialize_geo_bypass({ + 'countries': smuggled_data.get('geo_countries'), + 'ip_blocks': smuggled_data.get('geo_ip_blocks'), + }) + + account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups() + + webpage = self._download_webpage( + 'http://players.brightcove.net/%s/%s_%s/index.min.js' + % (account_id, player_id, embed), video_id) + + policy_key = None + + catalog = self._search_regex( + r'catalog\(({.+?})\);', webpage, 'catalog', default=None) + if catalog: + catalog = self._parse_json( + js_to_json(catalog), video_id, fatal=False) + if catalog: + policy_key = catalog.get('policyKey') + + if not policy_key: + policy_key = self._search_regex( + r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1', + webpage, 'policy key', group='pk') + + api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) + headers = { + 'Accept': 'application/json;pk=%s' % policy_key, + } + referrer = smuggled_data.get('referrer') + if referrer: + headers.update({ + 'Referer': referrer, + 'Origin': re.search(r'https?://[^/]+', referrer).group(0), + }) + try: + json_data = self._download_json(api_url, video_id, headers=headers) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + json_data = self._parse_json(e.cause.read().decode(), video_id)[0] + message = json_data.get('message') or json_data['error_code'] + if json_data.get('error_subcode') == 'CLIENT_GEO': + self.raise_geo_restricted(msg=message) + raise ExtractorError(message, expected=True) + raise + + errors = json_data.get('errors') + if errors and errors[0].get('error_subcode') == 'TVE_AUTH': + custom_fields = json_data['custom_fields'] + tve_token = self._extract_mvpd_auth( + smuggled_data['source_url'], video_id, + custom_fields['bcadobepassrequestorid'], + custom_fields['bcadobepassresourceid']) + json_data = self._download_json( + api_url, video_id, headers={ + 'Accept': 'application/json;pk=%s' % policy_key + }, query={ + 'tveToken': tve_token, + }) + + if content_type == 'playlist': + return self.playlist_result( + [self._parse_brightcove_metadata(vid, vid.get('id'), headers) + for vid in json_data.get('videos', []) if vid.get('id')], + json_data.get('id'), json_data.get('name'), + json_data.get('description')) + + return self._parse_brightcove_metadata( + json_data, video_id, headers=headers) diff --git a/youtube_dl/extractor/businessinsider.py b/youtube_dl/extractor/businessinsider.py new file mode 100644 index 000000000..dfcf9bc6b --- /dev/null +++ b/youtube_dl/extractor/businessinsider.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from .jwplatform import JWPlatformIE + + +class BusinessInsiderIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6', + 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e', + 'info_dict': { + 'id': 'hZRllCfw', + 'ext': 'mp4', + 'title': "Here's how much radiation you're exposed to in everyday life", + 'description': 'md5:9a0d6e2c279948aadaa5e84d6d9b99bd', + 'upload_date': '20170709', + 'timestamp': 1499606400, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/', + 'only_matching': True, + }, { + 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + jwplatform_id = self._search_regex( + (r'data-media-id=["\']([a-zA-Z0-9]{8})', + r'id=["\']jwplayer_([a-zA-Z0-9]{8})', + r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})'), + webpage, 'jwplatform id') + return self.url_result( + 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(), + video_id=video_id) diff --git a/youtube_dl/extractor/buzzfeed.py b/youtube_dl/extractor/buzzfeed.py new file mode 100644 index 000000000..ec411091e --- /dev/null +++ b/youtube_dl/extractor/buzzfeed.py @@ -0,0 +1,98 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from .facebook import FacebookIE + + +class BuzzFeedIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?buzzfeed\.com/[^?#]*?/(?P<id>[^?#]+)' + _TESTS = [{ + 'url': 'http://www.buzzfeed.com/abagg/this-angry-ram-destroys-a-punching-bag-like-a-boss?utm_term=4ldqpia', + 'info_dict': { + 'id': 'this-angry-ram-destroys-a-punching-bag-like-a-boss', + 'title': 'This Angry Ram Destroys A Punching Bag Like A Boss', + 'description': 'Rambro!', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'aVCR29aE_OQ', + 'ext': 'mp4', + 'title': 'Angry Ram destroys a punching bag..', + 'description': 'md5:c59533190ef23fd4458a5e8c8c872345', + 'upload_date': '20141024', + 'uploader_id': 'Buddhanz1', + 'uploader': 'Angry Ram', + } + }] + }, { + 'url': 'http://www.buzzfeed.com/sheridanwatson/look-at-this-cute-dog-omg?utm_term=4ldqpia', + 'params': { + 'skip_download': True, # Got enough YouTube download tests + }, + 'info_dict': { + 'id': 'look-at-this-cute-dog-omg', + 'description': 're:Munchkin the Teddy Bear is back ?!', + 'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'mVmBL8B-In0', + 'ext': 'mp4', + 'title': 're:Munchkin the Teddy Bear gets her exercise', + 'description': 'md5:28faab95cda6e361bcff06ec12fc21d8', + 'upload_date': '20141124', + 'uploader_id': 'CindysMunchkin', + 'uploader': 're:^Munchkin the', + }, + }] + }, { + 'url': 'http://www.buzzfeed.com/craigsilverman/the-most-adorable-crash-landing-ever#.eq7pX0BAmK', + 'info_dict': { + 'id': 'the-most-adorable-crash-landing-ever', + 'title': 'Watch This Baby Goose Make The Most Adorable Crash Landing', + 'description': 'This gosling knows how to stick a landing.', + }, + 'playlist': [{ + 'md5': '763ca415512f91ca62e4621086900a23', + 'info_dict': { + 'id': '971793786185728', + 'ext': 'mp4', + 'title': 'We set up crash pads so that the goslings on our roof would have a safe landi...', + 'uploader': 'Calgary Outdoor Centre-University of Calgary', + }, + }], + 'add_ie': ['Facebook'], + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + all_buckets = re.findall( + r'(?s)<div class="video-embed[^"]*"..*?rel:bf_bucket_data=\'([^\']+)\'', + webpage) + + entries = [] + for bd_json in all_buckets: + bd = json.loads(bd_json) + video = bd.get('video') or bd.get('progload_video') + if not video: + continue + entries.append(self.url_result(video['url'])) + + facebook_urls = FacebookIE._extract_urls(webpage) + entries.extend([ + self.url_result(facebook_url) + for facebook_url in facebook_urls]) + + return { + '_type': 'playlist', + 'id': playlist_id, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'entries': entries, + } diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py new file mode 100644 index 000000000..562c83af9 --- /dev/null +++ b/youtube_dl/extractor/byutv.py @@ -0,0 +1,92 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import parse_duration + + +class BYUtvIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?' + _TESTS = [{ + # ooyalaVOD + 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', + 'info_dict': { + 'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH', + 'display_id': 'studio-c-season-5-episode-5', + 'ext': 'mp4', + 'title': 'Season 5 Episode 5', + 'description': 'md5:1d31dc18ef4f075b28f6a65937d22c65', + 'thumbnail': r're:^https?://.*', + 'duration': 1486.486, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['Ooyala'], + }, { + # dvr + 'url': 'https://www.byutv.org/player/8f1dab9b-b243-47c8-b525-3e2d021a3451/byu-softball-pacific-vs-byu-41219---game-2', + 'info_dict': { + 'id': '8f1dab9b-b243-47c8-b525-3e2d021a3451', + 'display_id': 'byu-softball-pacific-vs-byu-41219---game-2', + 'ext': 'mp4', + 'title': 'Pacific vs. BYU (4/12/19)', + 'description': 'md5:1ac7b57cb9a78015910a4834790ce1f3', + 'duration': 11645, + }, + 'params': { + 'skip_download': True + }, + }, { + 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', + 'only_matching': True, + }, { + 'url': 'https://www.byutv.org/player/27741493-dc83-40b0-8420-e7ae38a2ae98/byu-football-toledo-vs-byu-93016?listid=4fe0fee5-0d3c-4a29-b725-e4948627f472&listindex=0&q=toledo', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id + + info = self._download_json( + 'https://api.byutv.org/api3/catalog/getvideosforcontent', + display_id, query={ + 'contentid': video_id, + 'channel': 'byutv', + 'x-byutv-context': 'web$US', + }, headers={ + 'x-byutv-context': 'web$US', + 'x-byutv-platformkey': 'xsaaw9c7y5', + }) + + ep = info.get('ooyalaVOD') + if ep: + return { + '_type': 'url_transparent', + 'ie_key': 'Ooyala', + 'url': 'ooyala:%s' % ep['providerId'], + 'id': video_id, + 'display_id': display_id, + 'title': ep.get('title'), + 'description': ep.get('description'), + 'thumbnail': ep.get('imageThumbnail'), + } + + ep = info['dvr'] + title = ep['title'] + formats = self._extract_m3u8_formats( + ep['videoUrl'], video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + self._sort_formats(formats) + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': ep.get('description'), + 'thumbnail': ep.get('imageThumbnail'), + 'duration': parse_duration(ep.get('length')), + 'formats': formats, + } diff --git a/youtube_dl/extractor/c56.py b/youtube_dl/extractor/c56.py new file mode 100644 index 000000000..cac8fdcba --- /dev/null +++ b/youtube_dl/extractor/c56.py @@ -0,0 +1,65 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import js_to_json + + +class C56IE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P<textid>.+?)\.(?:html|swf)' + IE_NAME = '56.com' + _TESTS = [{ + 'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html', + 'md5': 'e59995ac63d0457783ea05f93f12a866', + 'info_dict': { + 'id': '93440716', + 'ext': 'flv', + 'title': '网事知多少 第32期:车怒', + 'duration': 283.813, + }, + }, { + 'url': 'http://www.56.com/u47/v_MTM5NjQ5ODc2.html', + 'md5': '', + 'info_dict': { + 'id': '82247482', + 'title': '爱的诅咒之杜鹃花开', + }, + 'playlist_count': 7, + 'add_ie': ['Sohu'], + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) + text_id = mobj.group('textid') + + webpage = self._download_webpage(url, text_id) + sohu_video_info_str = self._search_regex( + r'var\s+sohuVideoInfo\s*=\s*({[^}]+});', webpage, 'Sohu video info', default=None) + if sohu_video_info_str: + sohu_video_info = self._parse_json( + sohu_video_info_str, text_id, transform_source=js_to_json) + return self.url_result(sohu_video_info['url'], 'Sohu') + + page = self._download_json( + 'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info') + + info = page['info'] + + formats = [ + { + 'format_id': f['type'], + 'filesize': int(f['filesize']), + 'url': f['url'] + } for f in info['rfiles'] + ] + self._sort_formats(formats) + + return { + 'id': info['vid'], + 'title': info['Subject'], + 'duration': int(info['duration']) / 1000.0, + 'formats': formats, + 'thumbnail': info.get('bimg') or info.get('img'), + } diff --git a/youtube_dl/extractor/camdemy.py b/youtube_dl/extractor/camdemy.py new file mode 100644 index 000000000..8f0c6c545 --- /dev/null +++ b/youtube_dl/extractor/camdemy.py @@ -0,0 +1,161 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_urlencode, + compat_urlparse, +) +from ..utils import ( + clean_html, + parse_duration, + str_to_int, + unified_strdate, +) + + +class CamdemyIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?camdemy\.com/media/(?P<id>\d+)' + _TESTS = [{ + # single file + 'url': 'http://www.camdemy.com/media/5181/', + 'md5': '5a5562b6a98b37873119102e052e311b', + 'info_dict': { + 'id': '5181', + 'ext': 'mp4', + 'title': 'Ch1-1 Introduction, Signals (02-23-2012)', + 'thumbnail': r're:^https?://.*\.jpg$', + 'creator': 'ss11spring', + 'duration': 1591, + 'upload_date': '20130114', + 'view_count': int, + } + }, { + # With non-empty description + # webpage returns "No permission or not login" + 'url': 'http://www.camdemy.com/media/13885', + 'md5': '4576a3bb2581f86c61044822adbd1249', + 'info_dict': { + 'id': '13885', + 'ext': 'mp4', + 'title': 'EverCam + Camdemy QuickStart', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'md5:2a9f989c2b153a2342acee579c6e7db6', + 'creator': 'evercam', + 'duration': 318, + } + }, { + # External source (YouTube) + 'url': 'http://www.camdemy.com/media/14842', + 'info_dict': { + 'id': '2vsYQzNIsJo', + 'ext': 'mp4', + 'title': 'Excel 2013 Tutorial - How to add Password Protection', + 'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection', + 'upload_date': '20130211', + 'uploader': 'Hun Kim', + 'uploader_id': 'hunkimtutorials', + }, + 'params': { + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + src_from = self._html_search_regex( + r"class=['\"]srcFrom['\"][^>]*>Sources?(?:\s+from)?\s*:\s*<a[^>]+(?:href|title)=(['\"])(?P<url>(?:(?!\1).)+)\1", + webpage, 'external source', default=None, group='url') + if src_from: + return self.url_result(src_from) + + oembed_obj = self._download_json( + 'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id) + + title = oembed_obj['title'] + thumb_url = oembed_obj['thumbnail_url'] + video_folder = compat_urlparse.urljoin(thumb_url, 'video/') + file_list_doc = self._download_xml( + compat_urlparse.urljoin(video_folder, 'fileList.xml'), + video_id, 'Downloading filelist XML') + file_name = file_list_doc.find('./video/item/fileName').text + video_url = compat_urlparse.urljoin(video_folder, file_name) + + # Some URLs return "No permission or not login" in a webpage despite being + # freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885) + upload_date = unified_strdate(self._search_regex( + r'>published on ([^<]+)<', webpage, + 'upload date', default=None)) + view_count = str_to_int(self._search_regex( + r'role=["\']viewCnt["\'][^>]*>([\d,.]+) views', + webpage, 'view count', default=None)) + description = self._html_search_meta( + 'description', webpage, default=None) or clean_html( + oembed_obj.get('description')) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'thumbnail': thumb_url, + 'description': description, + 'creator': oembed_obj.get('author_name'), + 'duration': parse_duration(oembed_obj.get('duration')), + 'upload_date': upload_date, + 'view_count': view_count, + } + + +class CamdemyFolderIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P<id>\d+)' + _TESTS = [{ + # links with trailing slash + 'url': 'http://www.camdemy.com/folder/450', + 'info_dict': { + 'id': '450', + 'title': '信號與系統 2012 & 2011 (Signals and Systems)', + }, + 'playlist_mincount': 145 + }, { + # links without trailing slash + # and multi-page + 'url': 'http://www.camdemy.com/folder/853', + 'info_dict': { + 'id': '853', + 'title': '科學計算 - 使用 Matlab' + }, + 'playlist_mincount': 20 + }, { + # with displayMode parameter. For testing the codes to add parameters + 'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg', + 'info_dict': { + 'id': '853', + 'title': '科學計算 - 使用 Matlab' + }, + 'playlist_mincount': 20 + }] + + def _real_extract(self, url): + folder_id = self._match_id(url) + + # Add displayMode=list so that all links are displayed in a single page + parsed_url = list(compat_urlparse.urlparse(url)) + query = dict(compat_urlparse.parse_qsl(parsed_url[4])) + query.update({'displayMode': 'list'}) + parsed_url[4] = compat_urllib_parse_urlencode(query) + final_url = compat_urlparse.urlunparse(parsed_url) + + page = self._download_webpage(final_url, folder_id) + matches = re.findall(r"href='(/media/\d+/?)'", page) + + entries = [self.url_result('http://www.camdemy.com' + media_path) + for media_path in matches] + + folder_title = self._html_search_meta('keywords', page) + + return self.playlist_result(entries, folder_id, folder_title) diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dl/extractor/cammodels.py new file mode 100644 index 000000000..1eb81b75e --- /dev/null +++ b/youtube_dl/extractor/cammodels.py @@ -0,0 +1,98 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + url_or_none, +) + + +class CamModelsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?cammodels\.com/cam/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.cammodels.com/cam/AutumnKnight/', + 'only_matching': True, + 'age_limit': 18 + }] + + def _real_extract(self, url): + user_id = self._match_id(url) + + webpage = self._download_webpage( + url, user_id, headers=self.geo_verification_headers()) + + manifest_root = self._html_search_regex( + r'manifestUrlRoot=([^&\']+)', webpage, 'manifest', default=None) + + if not manifest_root: + ERRORS = ( + ("I'm offline, but let's stay connected", 'This user is currently offline'), + ('in a private show', 'This user is in a private show'), + ('is currently performing LIVE', 'This model is currently performing live'), + ) + for pattern, message in ERRORS: + if pattern in webpage: + error = message + expected = True + break + else: + error = 'Unable to find manifest URL root' + expected = False + raise ExtractorError(error, expected=expected) + + manifest = self._download_json( + '%s%s.json' % (manifest_root, user_id), user_id) + + formats = [] + for format_id, format_dict in manifest['formats'].items(): + if not isinstance(format_dict, dict): + continue + encodings = format_dict.get('encodings') + if not isinstance(encodings, list): + continue + vcodec = format_dict.get('videoCodec') + acodec = format_dict.get('audioCodec') + for media in encodings: + if not isinstance(media, dict): + continue + media_url = url_or_none(media.get('location')) + if not media_url: + continue + + format_id_list = [format_id] + height = int_or_none(media.get('videoHeight')) + if height is not None: + format_id_list.append('%dp' % height) + f = { + 'url': media_url, + 'format_id': '-'.join(format_id_list), + 'width': int_or_none(media.get('videoWidth')), + 'height': height, + 'vbr': int_or_none(media.get('videoKbps')), + 'abr': int_or_none(media.get('audioKbps')), + 'fps': int_or_none(media.get('fps')), + 'vcodec': vcodec, + 'acodec': acodec, + } + if 'rtmp' in format_id: + f['ext'] = 'flv' + elif 'hls' in format_id: + f.update({ + 'ext': 'mp4', + # hls skips fragments, preferring rtmp + 'preference': -1, + }) + else: + continue + formats.append(f) + self._sort_formats(formats) + + return { + 'id': user_id, + 'title': self._live_title(user_id), + 'is_live': True, + 'formats': formats, + 'age_limit': 18 + } diff --git a/youtube_dl/extractor/camtube.py b/youtube_dl/extractor/camtube.py new file mode 100644 index 000000000..b3be3bdcf --- /dev/null +++ b/youtube_dl/extractor/camtube.py @@ -0,0 +1,71 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + unified_timestamp, +) + + +class CamTubeIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www|api)\.)?camtube\.co/recordings?/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://camtube.co/recording/minafay-030618-1136-chaturbate-female', + 'info_dict': { + 'id': '42ad3956-dd5b-445a-8313-803ea6079fac', + 'display_id': 'minafay-030618-1136-chaturbate-female', + 'ext': 'mp4', + 'title': 'minafay-030618-1136-chaturbate-female', + 'duration': 1274, + 'timestamp': 1528018608, + 'upload_date': '20180603', + 'age_limit': 18 + }, + 'params': { + 'skip_download': True, + }, + }] + + _API_BASE = 'https://api.camtube.co' + + def _real_extract(self, url): + display_id = self._match_id(url) + + token = self._download_json( + '%s/rpc/session/new' % self._API_BASE, display_id, + 'Downloading session token')['token'] + + self._set_cookie('api.camtube.co', 'session', token) + + video = self._download_json( + '%s/recordings/%s' % (self._API_BASE, display_id), display_id, + headers={'Referer': url}) + + video_id = video['uuid'] + timestamp = unified_timestamp(video.get('createdAt')) + duration = int_or_none(video.get('duration')) + view_count = int_or_none(video.get('viewCount')) + like_count = int_or_none(video.get('likeCount')) + creator = video.get('stageName') + + formats = [{ + 'url': '%s/recordings/%s/manifest.m3u8' + % (self._API_BASE, video_id), + 'format_id': 'hls', + 'ext': 'mp4', + 'protocol': 'm3u8_native', + }] + + return { + 'id': video_id, + 'display_id': display_id, + 'title': display_id, + 'timestamp': timestamp, + 'duration': duration, + 'view_count': view_count, + 'like_count': like_count, + 'creator': creator, + 'formats': formats, + 'age_limit': 18 + } diff --git a/youtube_dl/extractor/camwithher.py b/youtube_dl/extractor/camwithher.py new file mode 100644 index 000000000..bbc5205fd --- /dev/null +++ b/youtube_dl/extractor/camwithher.py @@ -0,0 +1,89 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, + unified_strdate, +) + + +class CamWithHerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?camwithher\.tv/view_video\.php\?.*\bviewkey=(?P<id>\w+)' + + _TESTS = [{ + 'url': 'http://camwithher.tv/view_video.php?viewkey=6e9a24e2c0e842e1f177&page=&viewtype=&category=', + 'info_dict': { + 'id': '5644', + 'ext': 'flv', + 'title': 'Periscope Tease', + 'description': 'In the clouds teasing on periscope to my favorite song', + 'duration': 240, + 'view_count': int, + 'comment_count': int, + 'uploader': 'MileenaK', + 'upload_date': '20160322', + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'http://camwithher.tv/view_video.php?viewkey=6dfd8b7c97531a459937', + 'only_matching': True, + }, { + 'url': 'http://camwithher.tv/view_video.php?page=&viewkey=6e9a24e2c0e842e1f177&viewtype=&category=', + 'only_matching': True, + }, { + 'url': 'http://camwithher.tv/view_video.php?viewkey=b6c3b5bea9515d1a1fc4&page=&viewtype=&category=mv', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + flv_id = self._html_search_regex( + r'<a[^>]+href=["\']/download/\?v=(\d+)', webpage, 'video id') + + # Video URL construction algorithm is reverse-engineered from cwhplayer.swf + rtmp_url = 'rtmp://camwithher.tv/clipshare/%s' % ( + ('mp4:%s.mp4' % flv_id) if int(flv_id) > 2010 else flv_id) + + title = self._html_search_regex( + r'<div[^>]+style="float:left"[^>]*>\s*<h2>(.+?)</h2>', webpage, 'title') + description = self._html_search_regex( + r'>Description:</span>(.+?)</div>', webpage, 'description', default=None) + + runtime = self._search_regex( + r'Runtime\s*:\s*(.+?) \|', webpage, 'duration', default=None) + if runtime: + runtime = re.sub(r'[\s-]', '', runtime) + duration = parse_duration(runtime) + view_count = int_or_none(self._search_regex( + r'Views\s*:\s*(\d+)', webpage, 'view count', default=None)) + comment_count = int_or_none(self._search_regex( + r'Comments\s*:\s*(\d+)', webpage, 'comment count', default=None)) + + uploader = self._search_regex( + r'Added by\s*:\s*<a[^>]+>([^<]+)</a>', webpage, 'uploader', default=None) + upload_date = unified_strdate(self._search_regex( + r'Added on\s*:\s*([\d-]+)', webpage, 'upload date', default=None)) + + return { + 'id': flv_id, + 'url': rtmp_url, + 'ext': 'flv', + 'no_resume': True, + 'title': title, + 'description': description, + 'duration': duration, + 'view_count': view_count, + 'comment_count': comment_count, + 'uploader': uploader, + 'upload_date': upload_date, + 'age_limit': 18 + } diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dl/extractor/canalc2.py new file mode 100644 index 000000000..407cc8084 --- /dev/null +++ b/youtube_dl/extractor/canalc2.py @@ -0,0 +1,73 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import parse_duration + + +class Canalc2IE(InfoExtractor): + IE_NAME = 'canalc2.tv' + _VALID_URL = r'https?://(?:(?:www\.)?canalc2\.tv/video/|archives-canalc2\.u-strasbg\.fr/video\.asp\?.*\bidVideo=)(?P<id>\d+)' + + _TESTS = [{ + 'url': 'http://www.canalc2.tv/video/12163', + 'md5': '060158428b650f896c542dfbb3d6487f', + 'info_dict': { + 'id': '12163', + 'ext': 'mp4', + 'title': 'Terrasses du Numérique', + 'duration': 122, + }, + }, { + 'url': 'http://archives-canalc2.u-strasbg.fr/video.asp?idVideo=11427&voir=oui', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://www.canalc2.tv/video/%s' % video_id, video_id) + + title = self._html_search_regex( + r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.+?)</h3>', + webpage, 'title') + + formats = [] + for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage): + if video_url.startswith('rtmp://'): + rtmp = re.search( + r'^(?P<url>rtmp://[^/]+/(?P<app>.+/))(?P<play_path>mp4:.+)$', video_url) + formats.append({ + 'url': rtmp.group('url'), + 'format_id': 'rtmp', + 'ext': 'flv', + 'app': rtmp.group('app'), + 'play_path': rtmp.group('play_path'), + 'page_url': url, + }) + else: + formats.append({ + 'url': video_url, + 'format_id': 'http', + }) + + if formats: + info = { + 'formats': formats, + } + else: + info = self._parse_html5_media_entries(url, webpage, url)[0] + + self._sort_formats(info['formats']) + + info.update({ + 'id': video_id, + 'title': title, + 'duration': parse_duration(self._search_regex( + r'id=["\']video_duree["\'][^>]*>([^<]+)', + webpage, 'duration', fatal=False)), + }) + return info diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py new file mode 100644 index 000000000..51c11cb7e --- /dev/null +++ b/youtube_dl/extractor/canalplus.py @@ -0,0 +1,116 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + # ExtractorError, + # HEADRequest, + int_or_none, + qualities, + unified_strdate, +) + + +class CanalplusIE(InfoExtractor): + IE_DESC = 'mycanal.fr and piwiplus.fr' + _VALID_URL = r'https?://(?:www\.)?(?P<site>mycanal|piwiplus)\.fr/(?:[^/]+/)*(?P<display_id>[^?/]+)(?:\.html\?.*\bvid=|/p/)(?P<id>\d+)' + _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s?format=json' + _SITE_ID_MAP = { + 'mycanal': 'cplus', + 'piwiplus': 'teletoon', + } + + # Only works for direct mp4 URLs + _GEO_COUNTRIES = ['FR'] + + _TESTS = [{ + 'url': 'https://www.mycanal.fr/d17-emissions/lolywood/p/1397061', + 'info_dict': { + 'id': '1397061', + 'display_id': 'lolywood', + 'ext': 'mp4', + 'title': 'Euro 2016 : Je préfère te prévenir - Lolywood - Episode 34', + 'description': 'md5:7d97039d455cb29cdba0d652a0efaa5e', + 'upload_date': '20160602', + }, + }, { + # geo restricted, bypassed + 'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190', + 'info_dict': { + 'id': '1108190', + 'display_id': 'pid1405-le-labyrinthe-boing-super-ranger', + 'ext': 'mp4', + 'title': 'BOING SUPER RANGER - Ep : Le labyrinthe', + 'description': 'md5:4cea7a37153be42c1ba2c1d3064376ff', + 'upload_date': '20140724', + }, + 'expected_warnings': ['HTTP Error 403: Forbidden'], + }] + + def _real_extract(self, url): + site, display_id, video_id = re.match(self._VALID_URL, url).groups() + + site_id = self._SITE_ID_MAP[site] + + info_url = self._VIDEO_INFO_TEMPLATE % (site_id, video_id) + video_data = self._download_json(info_url, video_id, 'Downloading video JSON') + + if isinstance(video_data, list): + video_data = [video for video in video_data if video.get('ID') == video_id][0] + media = video_data['MEDIA'] + infos = video_data['INFOS'] + + preference = qualities(['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD']) + + # _, fmt_url = next(iter(media['VIDEOS'].items())) + # if '/geo' in fmt_url.lower(): + # response = self._request_webpage( + # HEADRequest(fmt_url), video_id, + # 'Checking if the video is georestricted') + # if '/blocage' in response.geturl(): + # raise ExtractorError( + # 'The video is not available in your country', + # expected=True) + + formats = [] + for format_id, format_url in media['VIDEOS'].items(): + if not format_url: + continue + if format_id == 'HLS': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False)) + elif format_id == 'HDS': + formats.extend(self._extract_f4m_formats( + format_url + '?hdcore=2.11.3', video_id, f4m_id=format_id, fatal=False)) + else: + formats.append({ + # the secret extracted from ya function in http://player.canalplus.fr/common/js/canalPlayer.js + 'url': format_url + '?secret=pqzerjlsmdkjfoiuerhsdlfknaes', + 'format_id': format_id, + 'preference': preference(format_id), + }) + self._sort_formats(formats) + + thumbnails = [{ + 'id': image_id, + 'url': image_url, + } for image_id, image_url in media.get('images', {}).items()] + + titrage = infos['TITRAGE'] + + return { + 'id': video_id, + 'display_id': display_id, + 'title': '%s - %s' % (titrage['TITRE'], + titrage['SOUS_TITRE']), + 'upload_date': unified_strdate(infos.get('PUBLICATION', {}).get('DATE')), + 'thumbnails': thumbnails, + 'description': infos.get('DESCRIPTION'), + 'duration': int_or_none(infos.get('DURATION')), + 'view_count': int_or_none(infos.get('NB_VUES')), + 'like_count': int_or_none(infos.get('NB_LIKES')), + 'comment_count': int_or_none(infos.get('NB_COMMENTS')), + 'formats': formats, + } diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py new file mode 100644 index 000000000..c506bc5dd --- /dev/null +++ b/youtube_dl/extractor/canvas.py @@ -0,0 +1,319 @@ +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from .gigya import GigyaBaseIE +from ..compat import compat_HTTPError +from ..utils import ( + ExtractorError, + strip_or_none, + float_or_none, + int_or_none, + merge_dicts, + parse_iso8601, +) + + +class CanvasIE(InfoExtractor): + _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', + 'md5': '90139b746a0a9bd7bb631283f6e2a64e', + 'info_dict': { + 'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', + 'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', + 'ext': 'flv', + 'title': 'Nachtwacht: De Greystook', + 'description': 'md5:1db3f5dc4c7109c821261e7512975be7', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 1468.03, + }, + 'expected_warnings': ['is not a supported codec', 'Unknown MIME type'], + }, { + 'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', + 'only_matching': True, + }] + _HLS_ENTRY_PROTOCOLS_MAP = { + 'HLS': 'm3u8_native', + 'HLS_AES': 'm3u8', + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + site_id, video_id = mobj.group('site_id'), mobj.group('id') + + data = self._download_json( + 'https://mediazone.vrt.be/api/v1/%s/assets/%s' + % (site_id, video_id), video_id) + + title = data['title'] + description = data.get('description') + + formats = [] + for target in data['targetUrls']: + format_url, format_type = target.get('url'), target.get('type') + if not format_url or not format_type: + continue + if format_type in self._HLS_ENTRY_PROTOCOLS_MAP: + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type], + m3u8_id=format_type, fatal=False)) + elif format_type == 'HDS': + formats.extend(self._extract_f4m_formats( + format_url, video_id, f4m_id=format_type, fatal=False)) + elif format_type == 'MPEG_DASH': + formats.extend(self._extract_mpd_formats( + format_url, video_id, mpd_id=format_type, fatal=False)) + elif format_type == 'HSS': + formats.extend(self._extract_ism_formats( + format_url, video_id, ism_id='mss', fatal=False)) + else: + formats.append({ + 'format_id': format_type, + 'url': format_url, + }) + self._sort_formats(formats) + + subtitles = {} + subtitle_urls = data.get('subtitleUrls') + if isinstance(subtitle_urls, list): + for subtitle in subtitle_urls: + subtitle_url = subtitle.get('url') + if subtitle_url and subtitle.get('type') == 'CLOSED': + subtitles.setdefault('nl', []).append({'url': subtitle_url}) + + return { + 'id': video_id, + 'display_id': video_id, + 'title': title, + 'description': description, + 'formats': formats, + 'duration': float_or_none(data.get('duration'), 1000), + 'thumbnail': data.get('posterImageUrl'), + 'subtitles': subtitles, + } + + +class CanvasEenIE(InfoExtractor): + IE_DESC = 'canvas.be and een.be' + _VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week', + 'md5': 'ed66976748d12350b118455979cca293', + 'info_dict': { + 'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', + 'display_id': 'de-afspraak-veilt-voor-de-warmste-week', + 'ext': 'flv', + 'title': 'De afspraak veilt voor de Warmste Week', + 'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 49.02, + }, + 'expected_warnings': ['is not a supported codec'], + }, { + # with subtitles + 'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167', + 'info_dict': { + 'id': 'mz-ast-5240ff21-2d30-4101-bba6-92b5ec67c625', + 'display_id': 'pieter-0167', + 'ext': 'mp4', + 'title': 'Pieter 0167', + 'description': 'md5:943cd30f48a5d29ba02c3a104dc4ec4e', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 2553.08, + 'subtitles': { + 'nl': [{ + 'ext': 'vtt', + }], + }, + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Pagina niet gevonden', + }, { + 'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles', + 'info_dict': { + 'id': 'mz-ast-11a587f8-b921-4266-82e2-0bce3e80d07f', + 'display_id': 'herbekijk-sorry-voor-alles', + 'ext': 'mp4', + 'title': 'Herbekijk Sorry voor alles', + 'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 3788.06, + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Episode no longer available', + }, { + 'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + site_id, display_id = mobj.group('site_id'), mobj.group('id') + + webpage = self._download_webpage(url, display_id) + + title = strip_or_none(self._search_regex( + r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>', + webpage, 'title', default=None) or self._og_search_title( + webpage, default=None)) + + video_id = self._html_search_regex( + r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', + group='id') + + return { + '_type': 'url_transparent', + 'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id), + 'ie_key': CanvasIE.ie_key(), + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': self._og_search_description(webpage), + } + + +class VrtNUIE(GigyaBaseIE): + IE_DESC = 'VrtNU.be' + _VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/', + 'info_dict': { + 'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de', + 'ext': 'flv', + 'title': 'De zwarte weduwe', + 'description': 'md5:d90c21dced7db869a85db89a623998d4', + 'duration': 1457.04, + 'thumbnail': r're:^https?://.*\.jpg$', + 'season': '1', + 'season_number': 1, + 'episode_number': 1, + }, + 'skip': 'This video is only available for registered users' + }] + _NETRC_MACHINE = 'vrtnu' + _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy' + _CONTEXT_ID = 'R3595707040' + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + auth_data = { + 'APIKey': self._APIKEY, + 'targetEnv': 'jssdk', + 'loginID': username, + 'password': password, + 'authMode': 'cookie', + } + + auth_info = self._gigya_login(auth_data) + + # Sometimes authentication fails for no good reason, retry + login_attempt = 1 + while login_attempt <= 3: + try: + # When requesting a token, no actual token is returned, but the + # necessary cookies are set. + self._request_webpage( + 'https://token.vrt.be', + None, note='Requesting a token', errnote='Could not get a token', + headers={ + 'Content-Type': 'application/json', + 'Referer': 'https://www.vrt.be/vrtnu/', + }, + data=json.dumps({ + 'uid': auth_info['UID'], + 'uidsig': auth_info['UIDSignature'], + 'ts': auth_info['signatureTimestamp'], + 'email': auth_info['profile']['email'], + }).encode('utf-8')) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + login_attempt += 1 + self.report_warning('Authentication failed') + self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again') + else: + raise e + else: + break + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage, urlh = self._download_webpage_handle(url, display_id) + + info = self._search_json_ld(webpage, display_id, default={}) + + # title is optional here since it may be extracted by extractor + # that is delegated from here + title = strip_or_none(self._html_search_regex( + r'(?ms)<h1 class="content__heading">(.+?)</h1>', + webpage, 'title', default=None)) + + description = self._html_search_regex( + r'(?ms)<div class="content__description">(.+?)</div>', + webpage, 'description', default=None) + + season = self._html_search_regex( + [r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s* + <span>seizoen\ (.+?)</span>\s* + </div>''', + r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'], + webpage, 'season', default=None) + + season_number = int_or_none(season) + + episode_number = int_or_none(self._html_search_regex( + r'''(?xms)<div\ class="content__episode">\s* + <abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span> + </div>''', + webpage, 'episode_number', default=None)) + + release_date = parse_iso8601(self._html_search_regex( + r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"', + webpage, 'release_date', default=None)) + + # If there's a ? or a # in the URL, remove them and everything after + clean_url = urlh.geturl().split('?')[0].split('#')[0].strip('/') + securevideo_url = clean_url + '.mssecurevideo.json' + + try: + video = self._download_json(securevideo_url, display_id) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + self.raise_login_required() + raise + + # We are dealing with a '../<show>.relevant' URL + redirect_url = video.get('url') + if redirect_url: + return self.url_result(self._proto_relative_url(redirect_url, 'https:')) + + # There is only one entry, but with an unknown key, so just get + # the first one + video_id = list(video.values())[0].get('videoid') + + return merge_dicts(info, { + '_type': 'url_transparent', + 'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id, + 'ie_key': CanvasIE.ie_key(), + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'season': season, + 'season_number': season_number, + 'episode_number': episode_number, + 'release_date': release_date, + }) diff --git a/youtube_dl/extractor/carambatv.py b/youtube_dl/extractor/carambatv.py new file mode 100644 index 000000000..b57b86af7 --- /dev/null +++ b/youtube_dl/extractor/carambatv.py @@ -0,0 +1,108 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + float_or_none, + int_or_none, + try_get, +) + +from .videomore import VideomoreIE + + +class CarambaTVIE(InfoExtractor): + _VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://video1.carambatv.ru/v/191910501', + 'md5': '2f4a81b7cfd5ab866ee2d7270cb34a2a', + 'info_dict': { + 'id': '191910501', + 'ext': 'mp4', + 'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 2678.31, + }, + }, { + 'url': 'carambatv:191910501', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_json( + 'http://video1.carambatv.ru/v/%s/videoinfo.js' % video_id, + video_id) + + title = video['title'] + + base_url = video.get('video') or 'http://video1.carambatv.ru/v/%s/' % video_id + + formats = [{ + 'url': base_url + f['fn'], + 'height': int_or_none(f.get('height')), + 'format_id': '%sp' % f['height'] if f.get('height') else None, + } for f in video['qualities'] if f.get('fn')] + self._sort_formats(formats) + + thumbnail = video.get('splash') + duration = float_or_none(try_get( + video, lambda x: x['annotations'][0]['end_time'], compat_str)) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + } + + +class CarambaTVPageIE(InfoExtractor): + _VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/', + 'md5': 'a49fb0ec2ad66503eeb46aac237d3c86', + 'info_dict': { + 'id': '475222', + 'ext': 'flv', + 'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)', + 'thumbnail': r're:^https?://.*\.jpg', + # duration reported by videomore is incorrect + 'duration': int, + }, + 'add_ie': [VideomoreIE.ie_key()], + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + videomore_url = VideomoreIE._extract_url(webpage) + if not videomore_url: + videomore_id = self._search_regex( + r'getVMCode\s*\(\s*["\']?(\d+)', webpage, 'videomore id', + default=None) + if videomore_id: + videomore_url = 'videomore:%s' % videomore_id + if videomore_url: + title = self._og_search_title(webpage) + return { + '_type': 'url_transparent', + 'url': videomore_url, + 'ie_key': VideomoreIE.ie_key(), + 'title': title, + } + + video_url = self._og_search_property('video:iframe', webpage, default=None) + + if not video_url: + video_id = self._search_regex( + r'(?:video_id|crmb_vuid)\s*[:=]\s*["\']?(\d+)', + webpage, 'video id') + video_url = 'carambatv:%s' % video_id + + return self.url_result(video_url, CarambaTVIE.ie_key()) diff --git a/youtube_dl/extractor/cartoonnetwork.py b/youtube_dl/extractor/cartoonnetwork.py new file mode 100644 index 000000000..48b33617f --- /dev/null +++ b/youtube_dl/extractor/cartoonnetwork.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .turner import TurnerBaseIE +from ..utils import int_or_none + + +class CartoonNetworkIE(TurnerBaseIE): + _VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html' + _TEST = { + 'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html', + 'info_dict': { + 'id': '6e3375097f63874ebccec7ef677c1c3845fa850e', + 'ext': 'mp4', + 'title': 'How to Draw Upgrade', + 'description': 'md5:2061d83776db7e8be4879684eefe8c0f', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False): + metadata_re = '' + if content_re: + metadata_re = r'|video_metadata\.content_' + content_re + return self._search_regex( + r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re), + webpage, name, fatal=fatal) + + media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True) + title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True) + + info = self._extract_ngtv_info( + media_id, {'networkId': 'cartoonnetwork'}, { + 'url': url, + 'site_name': 'CartoonNetwork', + 'auth_required': find_field('authType', 'auth type') != 'unauth', + }) + + series = find_field( + 'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage) + info.update({ + 'id': media_id, + 'display_id': display_id, + 'title': title, + 'description': self._html_search_meta('description', webpage), + 'series': series, + 'episode': title, + }) + + for field in ('season', 'episode'): + field_name = field + 'Number' + info[field + '_number'] = int_or_none(find_field( + field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage)) + + return info diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py new file mode 100644 index 000000000..751a3a8f2 --- /dev/null +++ b/youtube_dl/extractor/cbc.py @@ -0,0 +1,457 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_HTTPError, +) +from ..utils import ( + js_to_json, + smuggle_url, + try_get, + xpath_text, + xpath_element, + xpath_with_ns, + find_xpath_attr, + orderedSet, + parse_duration, + parse_iso8601, + parse_age_limit, + strip_or_none, + int_or_none, + ExtractorError, +) + + +class CBCIE(InfoExtractor): + IE_NAME = 'cbc.ca' + _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)' + _TESTS = [{ + # with mediaId + 'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs', + 'md5': '97e24d09672fc4cf56256d6faa6c25bc', + 'info_dict': { + 'id': '2682904050', + 'ext': 'mp4', + 'title': 'Don Cherry – All-Stars', + 'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.', + 'timestamp': 1454463000, + 'upload_date': '20160203', + 'uploader': 'CBCC-NEW', + }, + 'skip': 'Geo-restricted to Canada', + }, { + # with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com + 'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4', + 'md5': '162adfa070274b144f4fdc3c3b8207db', + 'info_dict': { + 'id': '2414435309', + 'ext': 'mp4', + 'title': '22 Minutes Update: What Not To Wear Quebec', + 'description': "This week's latest Canadian top political story is What Not To Wear Quebec.", + 'upload_date': '20131025', + 'uploader': 'CBCC-NEW', + 'timestamp': 1382717907, + }, + }, { + # with clipId, feed only available via tpfeed.cbc.ca + 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live', + 'md5': '0274a90b51a9b4971fe005c63f592f12', + 'info_dict': { + 'id': '2487345465', + 'ext': 'mp4', + 'title': 'Robin Williams freestyles on 90 Minutes Live', + 'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.', + 'upload_date': '19780210', + 'uploader': 'CBCC-NEW', + 'timestamp': 255977160, + }, + }, { + # multiple iframes + 'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot', + 'playlist': [{ + 'md5': '377572d0b49c4ce0c9ad77470e0b96b4', + 'info_dict': { + 'id': '2680832926', + 'ext': 'mp4', + 'title': 'An Eagle\'s-Eye View Off Burrard Bridge', + 'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.', + 'upload_date': '20160201', + 'timestamp': 1454342820, + 'uploader': 'CBCC-NEW', + }, + }, { + 'md5': '415a0e3f586113894174dfb31aa5bb1a', + 'info_dict': { + 'id': '2658915080', + 'ext': 'mp4', + 'title': 'Fly like an eagle!', + 'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower', + 'upload_date': '20150315', + 'timestamp': 1426443984, + 'uploader': 'CBCC-NEW', + }, + }], + 'skip': 'Geo-restricted to Canada', + }, { + # multiple CBC.APP.Caffeine.initInstance(...) + 'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238', + 'info_dict': { + 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', + 'id': 'dog-indoor-exercise-winter-1.3928238', + 'description': 'md5:c18552e41726ee95bd75210d1ca9194c', + }, + 'playlist_mincount': 6, + }] + + @classmethod + def suitable(cls, url): + return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url) + + def _extract_player_init(self, player_init, display_id): + player_info = self._parse_json(player_init, display_id, js_to_json) + media_id = player_info.get('mediaId') + if not media_id: + clip_id = player_info['clipId'] + feed = self._download_json( + 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id, + clip_id, fatal=False) + if feed: + media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str) + if not media_id: + media_id = self._download_json( + 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, + clip_id)['entries'][0]['id'].split('/')[-1] + return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + title = self._og_search_title(webpage, default=None) or self._html_search_meta( + 'twitter:title', webpage, 'title', default=None) or self._html_search_regex( + r'<title>([^<]+)', webpage, 'title', fatal=False) + entries = [ + self._extract_player_init(player_init, display_id) + for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)] + media_ids = [] + for media_id_re in ( + r']+src="[^"]+?mediaId=(\d+)"', + r']+\bid=["\']player-(\d+)', + r'guid["\']\s*:\s*["\'](\d+)'): + media_ids.extend(re.findall(media_id_re, webpage)) + entries.extend([ + self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) + for media_id in orderedSet(media_ids)]) + return self.playlist_result( + entries, display_id, strip_or_none(title), + self._og_search_description(webpage)) + + +class CBCPlayerIE(InfoExtractor): + IE_NAME = 'cbc.ca:player' + _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P\d+)' + _TESTS = [{ + 'url': 'http://www.cbc.ca/player/play/2683190193', + 'md5': '64d25f841ddf4ddb28a235338af32e2c', + 'info_dict': { + 'id': '2683190193', + 'ext': 'mp4', + 'title': 'Gerry Runs a Sweat Shop', + 'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0', + 'timestamp': 1455071400, + 'upload_date': '20160210', + 'uploader': 'CBCC-NEW', + }, + 'skip': 'Geo-restricted to Canada', + }, { + # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/ + 'url': 'http://www.cbc.ca/player/play/2657631896', + 'md5': 'e5e708c34ae6fca156aafe17c43e8b75', + 'info_dict': { + 'id': '2657631896', + 'ext': 'mp3', + 'title': 'CBC Montreal is organizing its first ever community hackathon!', + 'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.', + 'timestamp': 1425704400, + 'upload_date': '20150307', + 'uploader': 'CBCC-NEW', + }, + }, { + 'url': 'http://www.cbc.ca/player/play/2164402062', + 'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6', + 'info_dict': { + 'id': '2164402062', + 'ext': 'mp4', + 'title': 'Cancer survivor four times over', + 'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.', + 'timestamp': 1320410746, + 'upload_date': '20111104', + 'uploader': 'CBCC-NEW', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': smuggle_url( + 'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true&formats=MPEG4,FLV,MP3' % video_id, { + 'force_smil_url': True + }), + 'id': video_id, + } + + +class CBCWatchBaseIE(InfoExtractor): + _device_id = None + _device_token = None + _API_BASE_URL = 'https://api-cbc.cloud.clearleap.com/cloffice/client/' + _NS_MAP = { + 'media': 'http://search.yahoo.com/mrss/', + 'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/', + } + _GEO_COUNTRIES = ['CA'] + + def _call_api(self, path, video_id): + url = path if path.startswith('http') else self._API_BASE_URL + path + for _ in range(2): + try: + result = self._download_xml(url, video_id, headers={ + 'X-Clearleap-DeviceId': self._device_id, + 'X-Clearleap-DeviceToken': self._device_token, + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + # Device token has expired, re-acquiring device token + self._register_device() + continue + raise + error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage') + if error_message: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message)) + return result + + def _real_initialize(self): + if self._valid_device_token(): + return + device = self._downloader.cache.load('cbcwatch', 'device') or {} + self._device_id, self._device_token = device.get('id'), device.get('token') + if self._valid_device_token(): + return + self._register_device() + + def _valid_device_token(self): + return self._device_id and self._device_token + + def _register_device(self): + self._device_id = self._device_token = None + result = self._download_xml( + self._API_BASE_URL + 'device/register', + None, 'Acquiring device token', + data=b'web') + self._device_id = xpath_text(result, 'deviceId', fatal=True) + self._device_token = xpath_text(result, 'deviceToken', fatal=True) + self._downloader.cache.store( + 'cbcwatch', 'device', { + 'id': self._device_id, + 'token': self._device_token, + }) + + def _parse_rss_feed(self, rss): + channel = xpath_element(rss, 'channel', fatal=True) + + def _add_ns(path): + return xpath_with_ns(path, self._NS_MAP) + + entries = [] + for item in channel.findall('item'): + guid = xpath_text(item, 'guid', fatal=True) + title = xpath_text(item, 'title', fatal=True) + + media_group = xpath_element(item, _add_ns('media:group'), fatal=True) + content = xpath_element(media_group, _add_ns('media:content'), fatal=True) + content_url = content.attrib['url'] + + thumbnails = [] + for thumbnail in media_group.findall(_add_ns('media:thumbnail')): + thumbnail_url = thumbnail.get('url') + if not thumbnail_url: + continue + thumbnails.append({ + 'id': thumbnail.get('profile'), + 'url': thumbnail_url, + 'width': int_or_none(thumbnail.get('width')), + 'height': int_or_none(thumbnail.get('height')), + }) + + timestamp = None + release_date = find_xpath_attr( + item, _add_ns('media:credit'), 'role', 'releaseDate') + if release_date is not None: + timestamp = parse_iso8601(release_date.text) + + entries.append({ + '_type': 'url_transparent', + 'url': content_url, + 'id': guid, + 'title': title, + 'description': xpath_text(item, 'description'), + 'timestamp': timestamp, + 'duration': int_or_none(content.get('duration')), + 'age_limit': parse_age_limit(xpath_text(item, _add_ns('media:rating'))), + 'episode': xpath_text(item, _add_ns('clearleap:episode')), + 'episode_number': int_or_none(xpath_text(item, _add_ns('clearleap:episodeInSeason'))), + 'series': xpath_text(item, _add_ns('clearleap:series')), + 'season_number': int_or_none(xpath_text(item, _add_ns('clearleap:season'))), + 'thumbnails': thumbnails, + 'ie_key': 'CBCWatchVideo', + }) + + return self.playlist_result( + entries, xpath_text(channel, 'guid'), + xpath_text(channel, 'title'), + xpath_text(channel, 'description')) + + +class CBCWatchVideoIE(CBCWatchBaseIE): + IE_NAME = 'cbc.ca:watch:video' + _VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _TEST = { + # geo-restricted to Canada, bypassable + 'url': 'https://api-cbc.cloud.clearleap.com/cloffice/client/web/play/?contentId=3c84472a-1eea-4dee-9267-2655d5055dcf&categoryId=ebc258f5-ee40-4cca-b66b-ba6bd55b7235', + 'only_matching': True, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + result = self._call_api(url, video_id) + + m3u8_url = xpath_text(result, 'url', fatal=True) + formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False) + if len(formats) < 2: + formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') + for f in formats: + format_id = f.get('format_id') + if format_id.startswith('AAC'): + f['acodec'] = 'aac' + elif format_id.startswith('AC3'): + f['acodec'] = 'ac-3' + self._sort_formats(formats) + + info = { + 'id': video_id, + 'title': video_id, + 'formats': formats, + } + + rss = xpath_element(result, 'rss') + if rss: + info.update(self._parse_rss_feed(rss)['entries'][0]) + del info['url'] + del info['_type'] + del info['ie_key'] + return info + + +class CBCWatchIE(CBCWatchBaseIE): + IE_NAME = 'cbc.ca:watch' + _VALID_URL = r'https?://(?:gem|watch)\.cbc\.ca/(?:[^/]+/)+(?P[0-9a-f-]+)' + _TESTS = [{ + # geo-restricted to Canada, bypassable + 'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4', + 'info_dict': { + 'id': '9673749a-5e77-484c-8b62-a1092a6b5168', + 'ext': 'mp4', + 'title': 'Customer (Dis)Service', + 'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87', + 'upload_date': '20160219', + 'timestamp': 1455840000, + }, + 'params': { + # m3u8 download + 'skip_download': True, + 'format': 'bestvideo', + }, + }, { + # geo-restricted to Canada, bypassable + 'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057', + 'info_dict': { + 'id': '1ed4b385-cd84-49cf-95f0-80f004680057', + 'title': 'Arthur', + 'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.', + }, + 'playlist_mincount': 30, + }, { + 'url': 'https://gem.cbc.ca/media/this-hour-has-22-minutes/season-26/episode-20/38e815a-0108c6c6a42', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + rss = self._call_api('web/browse/' + video_id, video_id) + return self._parse_rss_feed(rss) + + +class CBCOlympicsIE(InfoExtractor): + IE_NAME = 'cbc.ca:olympics' + _VALID_URL = r'https?://olympics\.cbc\.ca/video/[^/]+/(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://olympics.cbc.ca/video/whats-on-tv/olympic-morning-featuring-the-opening-ceremony/', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._hidden_inputs(webpage)['videoId'] + video_doc = self._download_xml( + 'https://olympics.cbc.ca/videodata/%s.xml' % video_id, video_id) + title = xpath_text(video_doc, 'title', fatal=True) + is_live = xpath_text(video_doc, 'kind') == 'Live' + if is_live: + title = self._live_title(title) + + formats = [] + for video_source in video_doc.findall('videoSources/videoSource'): + uri = xpath_text(video_source, 'uri') + if not uri: + continue + tokenize = self._download_json( + 'https://olympics.cbc.ca/api/api-akamai/tokenize', + video_id, data=json.dumps({ + 'VideoSource': uri, + }).encode(), headers={ + 'Content-Type': 'application/json', + 'Referer': url, + # d3.VideoPlayer._init in https://olympics.cbc.ca/components/script/base.js + 'Cookie': '_dvp=TK:C0ObxjerU', # AKAMAI CDN cookie + }, fatal=False) + if not tokenize: + continue + content_url = tokenize['ContentUrl'] + video_source_format = video_source.get('format') + if video_source_format == 'IIS': + formats.extend(self._extract_ism_formats( + content_url, video_id, ism_id=video_source_format, fatal=False)) + else: + formats.extend(self._extract_m3u8_formats( + content_url, video_id, 'mp4', + 'm3u8' if is_live else 'm3u8_native', + m3u8_id=video_source_format, fatal=False)) + self._sort_formats(formats) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': xpath_text(video_doc, 'description'), + 'thumbnail': xpath_text(video_doc, 'thumbnailUrl'), + 'duration': parse_duration(xpath_text(video_doc, 'duration')), + 'formats': formats, + 'is_live': is_live, + } diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py new file mode 100644 index 000000000..4a19a73d2 --- /dev/null +++ b/youtube_dl/extractor/cbs.py @@ -0,0 +1,112 @@ +from __future__ import unicode_literals + +from .theplatform import ThePlatformFeedIE +from ..utils import ( + ExtractorError, + int_or_none, + find_xpath_attr, + xpath_element, + xpath_text, + update_url_query, +) + + +class CBSBaseIE(ThePlatformFeedIE): + def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): + subtitles = {} + for k, ext in [('sMPTE-TTCCURL', 'tt'), ('ClosedCaptionURL', 'ttml'), ('webVTTCaptionURL', 'vtt')]: + cc_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', k) + if cc_e is not None: + cc_url = cc_e.get('value') + if cc_url: + subtitles.setdefault(subtitles_lang, []).append({ + 'ext': ext, + 'url': cc_url, + }) + return subtitles + + +class CBSIE(CBSBaseIE): + _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P[\w-]+)' + + _TESTS = [{ + 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', + 'info_dict': { + 'id': '_u7W953k6la293J7EPTd9oHkSPs6Xn6_', + 'ext': 'mp4', + 'title': 'Connect Chat feat. Garth Brooks', + 'description': 'Connect with country music singer Garth Brooks, as he chats with fans on Wednesday November 27, 2013. Be sure to tune in to Garth Brooks: Live from Las Vegas, Friday November 29, at 9/8c on CBS!', + 'duration': 1495, + 'timestamp': 1385585425, + 'upload_date': '20131127', + 'uploader': 'CBSI-NEW', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + '_skip': 'Blocked outside the US', + }, { + 'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/', + 'only_matching': True, + }, { + 'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/', + 'only_matching': True, + }] + + def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517): + items_data = self._download_xml( + 'http://can.cbs.com/thunder/player/videoPlayerService.php', + content_id, query={'partner': site, 'contentId': content_id}) + video_data = xpath_element(items_data, './/item') + title = xpath_text(video_data, 'videoTitle', 'title', True) + tp_path = 'dJ5BDC/media/guid/%d/%s' % (mpx_acc, content_id) + tp_release_url = 'http://link.theplatform.com/s/' + tp_path + + asset_types = [] + subtitles = {} + formats = [] + last_e = None + for item in items_data.findall('.//item'): + asset_type = xpath_text(item, 'assetType') + if not asset_type or asset_type in asset_types or 'HLS_FPS' in asset_type or 'DASH_CENC' in asset_type: + continue + asset_types.append(asset_type) + query = { + 'mbr': 'true', + 'assetTypes': asset_type, + } + if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'): + query['formats'] = 'MPEG4,M3U' + elif asset_type in ('RTMP', 'WIFI', '3G'): + query['formats'] = 'MPEG4,FLV' + try: + tp_formats, tp_subtitles = self._extract_theplatform_smil( + update_url_query(tp_release_url, query), content_id, + 'Downloading %s SMIL data' % asset_type) + except ExtractorError as e: + last_e = e + continue + formats.extend(tp_formats) + subtitles = self._merge_subtitles(subtitles, tp_subtitles) + if last_e and not formats: + raise last_e + self._sort_formats(formats) + + info = self._extract_theplatform_metadata(tp_path, content_id) + info.update({ + 'id': content_id, + 'title': title, + 'series': xpath_text(video_data, 'seriesTitle'), + 'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), + 'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), + 'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000), + 'thumbnail': xpath_text(video_data, 'previewImageURL'), + 'formats': formats, + 'subtitles': subtitles, + }) + return info + + def _real_extract(self, url): + content_id = self._match_id(url) + return self._extract_video_info(content_id) diff --git a/youtube_dl/extractor/cbsinteractive.py b/youtube_dl/extractor/cbsinteractive.py new file mode 100644 index 000000000..6596e98a6 --- /dev/null +++ b/youtube_dl/extractor/cbsinteractive.py @@ -0,0 +1,103 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .cbs import CBSIE +from ..utils import int_or_none + + +class CBSInteractiveIE(CBSIE): + _VALID_URL = r'https?://(?:www\.)?(?Pcnet|zdnet)\.com/(?:videos|video(?:/share)?)/(?P[^/?]+)' + _TESTS = [{ + 'url': 'http://www.cnet.com/videos/hands-on-with-microsofts-windows-8-1-update/', + 'info_dict': { + 'id': 'R49SYt__yAfmlXR85z4f7gNmCBDcN_00', + 'display_id': 'hands-on-with-microsofts-windows-8-1-update', + 'ext': 'mp4', + 'title': 'Hands-on with Microsoft Windows 8.1 Update', + 'description': 'The new update to the Windows 8 OS brings improved performance for mouse and keyboard users.', + 'uploader_id': '6085384d-619e-11e3-b231-14feb5ca9861', + 'uploader': 'Sarah Mitroff', + 'duration': 70, + 'timestamp': 1396479627, + 'upload_date': '20140402', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.cnet.com/videos/whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187/', + 'md5': 'f11d27b2fa18597fbf92444d2a9ed386', + 'info_dict': { + 'id': 'kjOJd_OoVJqbg_ZD8MZCOk8Wekb9QccK', + 'display_id': 'whiny-pothole-tweets-at-local-government-when-hit-by-cars-tomorrow-daily-187', + 'ext': 'mp4', + 'title': 'Whiny potholes tweet at local government when hit by cars (Tomorrow Daily 187)', + 'description': 'md5:d2b9a95a5ffe978ae6fbd4cf944d618f', + 'uploader_id': 'b163284d-6b73-44fc-b3e6-3da66c392d40', + 'uploader': 'Ashley Esqueda', + 'duration': 1482, + 'timestamp': 1433289889, + 'upload_date': '20150603', + }, + }, { + 'url': 'http://www.zdnet.com/video/share/video-keeping-android-smartphones-and-tablets-secure/', + 'info_dict': { + 'id': 'k0r4T_ehht4xW_hAOqiVQPuBDPZ8SRjt', + 'display_id': 'video-keeping-android-smartphones-and-tablets-secure', + 'ext': 'mp4', + 'title': 'Video: Keeping Android smartphones and tablets secure', + 'description': 'Here\'s the best way to keep Android devices secure, and what you do when they\'ve come to the end of their lives.', + 'uploader_id': 'f2d97ea2-8175-11e2-9d12-0018fe8a00b0', + 'uploader': 'Adrian Kingsley-Hughes', + 'duration': 731, + 'timestamp': 1449129925, + 'upload_date': '20151203', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.zdnet.com/video/huawei-matebook-x-video/', + 'only_matching': True, + }] + + MPX_ACCOUNTS = { + 'cnet': 2198311517, + 'zdnet': 2387448114, + } + + def _real_extract(self, url): + site, display_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, display_id) + + data_json = self._html_search_regex( + r"data(?:-(?:cnet|zdnet))?-video(?:-(?:uvp(?:js)?|player))?-options='([^']+)'", + webpage, 'data json') + data = self._parse_json(data_json, display_id) + vdata = data.get('video') or (data.get('videos') or data.get('playlist'))[0] + + video_id = vdata['mpxRefId'] + + title = vdata['title'] + author = vdata.get('author') + if author: + uploader = '%s %s' % (author['firstName'], author['lastName']) + uploader_id = author.get('id') + else: + uploader = None + uploader_id = None + + info = self._extract_video_info(video_id, site, self.MPX_ACCOUNTS[site]) + info.update({ + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'duration': int_or_none(vdata.get('duration')), + 'uploader': uploader, + 'uploader_id': uploader_id, + }) + return info diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py new file mode 100644 index 000000000..90852a9ef --- /dev/null +++ b/youtube_dl/extractor/cbslocal.py @@ -0,0 +1,104 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .anvato import AnvatoIE +from .sendtonews import SendtoNewsIE +from ..compat import compat_urlparse +from ..utils import ( + parse_iso8601, + unified_timestamp, +) + + +class CBSLocalIE(AnvatoIE): + _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P[0-9a-z-]+)' + + _TESTS = [{ + # Anvato backend + 'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis', + 'md5': 'f0ee3081e3843f575fccef901199b212', + 'info_dict': { + 'id': '3401037', + 'ext': 'mp4', + 'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'', + 'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.', + 'thumbnail': 're:^https?://.*', + 'timestamp': 1463440500, + 'upload_date': '20160516', + 'uploader': 'CBS', + 'subtitles': { + 'en': 'mincount:5', + }, + 'categories': [ + 'Stations\\Spoken Word\\KCBSTV', + 'Syndication\\MSN', + 'Syndication\\NDN', + 'Syndication\\AOL', + 'Syndication\\Yahoo', + 'Syndication\\Tribune', + 'Syndication\\Curb.tv', + 'Content\\News' + ], + 'tags': ['CBS 2 News Evening'], + }, + }, { + # SendtoNews embed + 'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/', + 'info_dict': { + 'id': 'GxfCe0Zo7D-175909-5588', + }, + 'playlist_count': 9, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/', + 'info_dict': { + 'id': '3580809', + 'ext': 'mp4', + 'title': 'A Very Blue Anniversary', + 'description': 'CBS2’s Cindy Hsu has more.', + 'thumbnail': 're:^https?://.*', + 'timestamp': int, + 'upload_date': r're:^\d{8}$', + 'uploader': 'CBS', + 'subtitles': { + 'en': 'mincount:5', + }, + 'categories': [ + 'Stations\\Spoken Word\\WCBSTV', + 'Syndication\\AOL', + 'Syndication\\MSN', + 'Syndication\\NDN', + 'Syndication\\Yahoo', + 'Content\\News', + 'Content\\News\\Local News', + ], + 'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'], + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + sendtonews_url = SendtoNewsIE._extract_url(webpage) + if sendtonews_url: + return self.url_result( + compat_urlparse.urljoin(url, sendtonews_url), + ie=SendtoNewsIE.ie_key()) + + info_dict = self._extract_anvato_videos(webpage, display_id) + + timestamp = unified_timestamp(self._html_search_regex( + r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage, + 'released date', default=None)) or parse_iso8601( + self._html_search_meta('uploadDate', webpage)) + + info_dict.update({ + 'display_id': display_id, + 'timestamp': timestamp, + }) + + return info_dict diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py new file mode 100644 index 000000000..345debcf0 --- /dev/null +++ b/youtube_dl/extractor/cbsnews.py @@ -0,0 +1,147 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import zlib + +from .common import InfoExtractor +from .cbs import CBSIE +from ..compat import ( + compat_b64decode, + compat_urllib_parse_unquote, +) +from ..utils import ( + parse_duration, +) + + +class CBSNewsEmbedIE(CBSIE): + IE_NAME = 'cbsnews:embed' + _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/embed/video[^#]*#(?P.+)' + _TESTS = [{ + 'url': 'https://www.cbsnews.com/embed/video/?v=1.c9b5b61492913d6660db0b2f03579ef25e86307a#1Vb7b9s2EP5XBAHbT6Gt98PAMKTJ0se6LVjWYWtdGBR1stlIpEBSTtwi%2F%2FvuJNkNhmHdGxgM2NL57vjd6zt%2B8PngdN%2Fyg79qeGvhzN%2FLGrS%2F%2BuBLB531V28%2B%2BO7Qg7%2Fy97r2z3xZ42NW8yLhDbA0S0KWlHnIijwKWJBHZZnHBa8Cgbpdf%2F89NM9Hi9fXifhpr8sr%2FlP848tn%2BTdXycX25zh4cdX%2FvHl6PmmPqnWQv9w8Ed%2B9GjYRim07bFEqdG%2BZVHuwTm65A7bVRrYtR5lAyMox7pigF6W4k%2By91mjspGsJ%2BwVae4%2BsvdnaO1p73HkXs%2FVisUDTGm7R8IcdnOROeq%2B19qT1amhA1VJtPenoTUgrtfKc9m7Rq8dP7nnjwOB7wg7ADdNt7VX64DWAWlKhPtmDEq22g4GF99x6Dk9E8OSsankHXqPNKDxC%2FdK7MLKTircTDgsI3mmj4OBdSq64dy7fd1x577RU1rt4cvMtOaulFYOd%2FLewRWvDO9lIgXFpZSnkZmjbv5SxKTPoQXClFbpsf%2Fhbbpzs0IB3vb8KkyzJQ%2BywOAgCrMpgRrz%2BKk4fvb7kFbR4XJCu0gAdtNO7woCwZTu%2BBUs9bam%2Fds71drVerpeisgrubLjAB4nnOSkWQnfr5W6o1ku5Xpr1MgrCbL0M0vUyDtfLLK15WiYp47xKWSLyjFVpwVmVJSLIoCjSOFkv3W7oKsVliwZJcB9nwXpZ5GEQQwY8jNKqKCBrgjTLeFxgdCIpazojDgnRtn43J6kG7nZ6cAbxh0EeFFk4%2B1u867cY5u4344n%2FxXjCqAjucdTHgLKojNKmSfO8KRsOFY%2FzKEYCKEJBzv90QA9nfm9gL%2BHulaFqUkz9ULUYxl62B3U%2FRVNLA8IhggaPycOoBuwOCESciDQVSSUgiOMsROB%2FhKfwCKOzEk%2B4k6rWd4uuT%2FwTDz7K7t3d3WLO8ISD95jSPQbayBacthbz86XVgxHwhex5zawzgDOmtp%2F3GPcXn0VXHdSS029%2Fj99UC%2FwJUvyKQ%2FzKyixIEVlYJOn4RxxuaH43Ty9fbJ5OObykHH435XAzJTHeOF4hhEUXD8URe%2FQ%2FBT%2BMpf8d5GN02Ox%2FfiGsl7TA7POu1xZ5%2BbTzcAVKMe48mqcC21hkacVEVScM26liVVBnrKkC4CLKyzAvHu0lhEaTKMFwI3a4SN9MsrfYzdBLq2vkwRD1gVviLT8kY9h2CHH6Y%2Bix6609weFtey4ESp60WtyeWMy%2BsmBuhsoKIyuoT%2Bq2R%2FrW5qi3g%2FvzS2j40DoixDP8%2BKP0yUdpXJ4l6Vla%2Bg9vce%2BC4yM5YlUcbA%2F0jLKdpmTwvsdN5z88nAIe08%2F0HgxeG1iv%2B6Hlhjh7uiW0SDzYNI92L401uha3JKYk268UVRzdOzNQvAaJqoXzAc80dAV440NZ1WVVAAMRYQ2KrGJFmDUsq8saWSnjvIj8t78y%2FRa3JRnbHVfyFpfwoDiGpPgjzekyUiKNlU3OMlwuLMmzgvEojllYVE2Z1HhImvsnk%2BuhusTEoB21PAtSFodeFK3iYhXEH9WOG2%2FkOE833sfeG%2Ff5cfHtEFNXgYes0%2FXj7aGivUgJ9XpusCtoNcNYVVnJVrrDo0OmJAutHCpuZul4W9lLcfy7BnuLPT02%2ByXsCTk%2B9zhzswIN04YueNSK%2BPtM0jS88QdLqSLJDTLsuGZJNolm2yO0PXh3UPnz9Ix5bfIAqxPjvETQsDCEiPG4QbqNyhBZISxybLnZYCrW5H3Axp690%2F0BJdXtDZ5ITuM4xj3f4oUHGzc5JeJmZKpp%2FjwKh4wMV%2FV1yx3emLoR0MwbG4K%2F%2BZgVep3PnzXGDHZ6a3i%2Fk%2BJrONDN13%2Bnq6tBTYk4o7cLGhBtqCC4KwacGHpEVuoH5JNro%2FE6JfE6d5RydbiR76k%2BW5wioDHBIjw1euhHjUGRB0y5A97KoaPx6MlL%2BwgboUVtUFRI%2FLemgTpdtF59ii7pab08kuPcfWzs0l%2FRI5takWnFpka0zOgWRtYcuf9aIxZMxlwr6IiGpsb6j2DQUXPl%2FimXI599Ev7fWjoPD78A', + 'only_matching': True, + }] + + def _real_extract(self, url): + item = self._parse_json(zlib.decompress(compat_b64decode( + compat_urllib_parse_unquote(self._match_id(url))), + -zlib.MAX_WBITS), None)['video']['items'][0] + return self._extract_video_info(item['mpxRefId'], 'cbsnews') + + +class CBSNewsIE(CBSIE): + IE_NAME = 'cbsnews' + IE_DESC = 'CBS News' + _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P[\da-z_-]+)' + + _TESTS = [ + { + # 60 minutes + 'url': 'http://www.cbsnews.com/news/artificial-intelligence-positioned-to-be-a-game-changer/', + 'info_dict': { + 'id': 'Y_nf_aEg6WwO9OLAq0MpKaPgfnBUxfW4', + 'ext': 'flv', + 'title': 'Artificial Intelligence, real-life applications', + 'description': 'md5:a7aaf27f1b4777244de8b0b442289304', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 317, + 'uploader': 'CBSI-NEW', + 'timestamp': 1476046464, + 'upload_date': '20161009', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, + { + 'url': 'https://www.cbsnews.com/video/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/', + 'info_dict': { + 'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y', + 'ext': 'mp4', + 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack', + 'description': 'md5:4a6983e480542d8b333a947bfc64ddc7', + 'upload_date': '20140404', + 'timestamp': 1396650660, + 'uploader': 'CBSI-NEW', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 205, + 'subtitles': { + 'en': [{ + 'ext': 'ttml', + }], + }, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + # 48 hours + 'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/', + 'info_dict': { + 'title': 'Cold as Ice', + 'description': 'Can a childhood memory solve the 1957 murder of 7-year-old Maria Ridulph?', + }, + 'playlist_mincount': 7, + }, + ] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + entries = [] + for embed_url in re.findall(r']+data-src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage): + entries.append(self.url_result(embed_url, CBSNewsEmbedIE.ie_key())) + if entries: + return self.playlist_result( + entries, playlist_title=self._html_search_meta(['og:title', 'twitter:title'], webpage), + playlist_description=self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage)) + + item = self._parse_json(self._html_search_regex( + r'CBSNEWS\.defaultPayload\s*=\s*({.+})', + webpage, 'video JSON info'), display_id)['items'][0] + return self._extract_video_info(item['mpxRefId'], 'cbsnews') + + +class CBSNewsLiveVideoIE(InfoExtractor): + IE_NAME = 'cbsnews:livevideo' + IE_DESC = 'CBS News Live Videos' + _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/live/video/(?P[^/?#]+)' + + # Live videos get deleted soon. See http://www.cbsnews.com/live/ for the latest examples + _TEST = { + 'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/', + 'info_dict': { + 'id': 'clinton-sanders-prepare-to-face-off-in-nh', + 'ext': 'mp4', + 'title': 'Clinton, Sanders Prepare To Face Off In NH', + 'duration': 334, + }, + 'skip': 'Video gone', + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + video_info = self._download_json( + 'http://feeds.cbsn.cbsnews.com/rundown/story', display_id, query={ + 'device': 'desktop', + 'dvr_slug': display_id, + }) + + formats = self._extract_akamai_formats(video_info['url'], display_id) + self._sort_formats(formats) + + return { + 'id': display_id, + 'display_id': display_id, + 'title': video_info['headline'], + 'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'), + 'duration': parse_duration(video_info.get('segmentDur')), + 'formats': formats, + } diff --git a/youtube_dl/extractor/cbssports.py b/youtube_dl/extractor/cbssports.py new file mode 100644 index 000000000..83b764762 --- /dev/null +++ b/youtube_dl/extractor/cbssports.py @@ -0,0 +1,38 @@ +from __future__ import unicode_literals + +from .cbs import CBSBaseIE + + +class CBSSportsIE(CBSBaseIE): + _VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/(?:video|news)/(?P[^/?#&]+)' + + _TESTS = [{ + 'url': 'https://www.cbssports.com/nba/video/donovan-mitchell-flashes-star-potential-in-game-2-victory-over-thunder/', + 'info_dict': { + 'id': '1214315075735', + 'ext': 'mp4', + 'title': 'Donovan Mitchell flashes star potential in Game 2 victory over Thunder', + 'description': 'md5:df6f48622612c2d6bd2e295ddef58def', + 'timestamp': 1524111457, + 'upload_date': '20180419', + 'uploader': 'CBSI-NEW', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'https://www.cbssports.com/nba/news/nba-playoffs-2018-watch-76ers-vs-heat-game-3-series-schedule-tv-channel-online-stream/', + 'only_matching': True, + }] + + def _extract_video_info(self, filter_query, video_id): + return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id) + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + [r'(?:=|%26)pcid%3D(\d+)', r'embedVideo(?:Container)?_(\d+)'], + webpage, 'video id') + return self._extract_video_info('byId=%s' % video_id, video_id) diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py new file mode 100644 index 000000000..36e6dff72 --- /dev/null +++ b/youtube_dl/extractor/ccc.py @@ -0,0 +1,111 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_iso8601, + try_get, + url_or_none, +) + + +class CCCIE(InfoExtractor): + IE_NAME = 'media.ccc.de' + _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/v/(?P[^/?#&]+)' + + _TESTS = [{ + 'url': 'https://media.ccc.de/v/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor#video', + 'md5': '3a1eda8f3a29515d27f5adb967d7e740', + 'info_dict': { + 'id': '1839', + 'ext': 'mp4', + 'title': 'Introduction to Processor Design', + 'creator': 'byterazor', + 'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20131228', + 'timestamp': 1388188800, + 'duration': 3710, + 'tags': list, + } + }, { + 'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + event_id = self._search_regex(r"data-id='(\d+)'", webpage, 'event id') + event_data = self._download_json('https://media.ccc.de/public/events/%s' % event_id, event_id) + + formats = [] + for recording in event_data.get('recordings', []): + recording_url = recording.get('recording_url') + if not recording_url: + continue + language = recording.get('language') + folder = recording.get('folder') + format_id = None + if language: + format_id = language + if folder: + if language: + format_id += '-' + folder + else: + format_id = folder + vcodec = 'h264' if 'h264' in folder else ( + 'none' if folder in ('mp3', 'opus') else None + ) + formats.append({ + 'format_id': format_id, + 'url': recording_url, + 'width': int_or_none(recording.get('width')), + 'height': int_or_none(recording.get('height')), + 'filesize': int_or_none(recording.get('size'), invscale=1024 * 1024), + 'language': language, + 'vcodec': vcodec, + }) + self._sort_formats(formats) + + return { + 'id': event_id, + 'display_id': display_id, + 'title': event_data['title'], + 'creator': try_get(event_data, lambda x: ', '.join(x['persons'])), + 'description': event_data.get('description'), + 'thumbnail': event_data.get('thumb_url'), + 'timestamp': parse_iso8601(event_data.get('date')), + 'duration': int_or_none(event_data.get('length')), + 'tags': event_data.get('tags'), + 'formats': formats, + } + + +class CCCPlaylistIE(InfoExtractor): + IE_NAME = 'media.ccc.de:lists' + _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/c/(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://media.ccc.de/c/30c3', + 'info_dict': { + 'title': '30C3', + 'id': '30c3', + }, + 'playlist_count': 135, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url).lower() + + conf = self._download_json( + 'https://media.ccc.de/public/conferences/' + playlist_id, + playlist_id) + + entries = [] + for e in conf['events']: + event_url = url_or_none(e.get('frontend_link')) + if event_url: + entries.append(self.url_result(event_url, ie=CCCIE.ie_key())) + + return self.playlist_result(entries, playlist_id, conf.get('title')) diff --git a/youtube_dl/extractor/ccma.py b/youtube_dl/extractor/ccma.py new file mode 100644 index 000000000..544647f92 --- /dev/null +++ b/youtube_dl/extractor/ccma.py @@ -0,0 +1,109 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + int_or_none, + parse_duration, + parse_iso8601, + parse_resolution, + url_or_none, +) + + +class CCMAIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?Pvideo|audio)/(?P\d+)' + _TESTS = [{ + 'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/', + 'md5': '7296ca43977c8ea4469e719c609b0871', + 'info_dict': { + 'id': '5630208', + 'ext': 'mp4', + 'title': 'L\'espot de La Marató de TV3', + 'description': 'md5:f12987f320e2f6e988e9908e4fe97765', + 'timestamp': 1470918540, + 'upload_date': '20160811', + } + }, { + 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/', + 'md5': 'fa3e38f269329a278271276330261425', + 'info_dict': { + 'id': '943685', + 'ext': 'mp3', + 'title': 'El Consell de Savis analitza el derbi', + 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53', + 'upload_date': '20171205', + 'timestamp': 1512507300, + } + }] + + def _real_extract(self, url): + media_type, media_id = re.match(self._VALID_URL, url).groups() + + media = self._download_json( + 'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={ + 'media': media_type, + 'idint': media_id, + }) + + formats = [] + media_url = media['media']['url'] + if isinstance(media_url, list): + for format_ in media_url: + format_url = url_or_none(format_.get('file')) + if not format_url: + continue + label = format_.get('label') + f = parse_resolution(label) + f.update({ + 'url': format_url, + 'format_id': label, + }) + formats.append(f) + else: + formats.append({ + 'url': media_url, + 'vcodec': 'none' if media_type == 'audio' else None, + }) + self._sort_formats(formats) + + informacio = media['informacio'] + title = informacio['titol'] + durada = informacio.get('durada', {}) + duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text')) + timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc')) + + subtitles = {} + subtitols = media.get('subtitols', {}) + if subtitols: + sub_url = subtitols.get('url') + if sub_url: + subtitles.setdefault( + subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({ + 'url': sub_url, + }) + + thumbnails = [] + imatges = media.get('imatges', {}) + if imatges: + thumbnail_url = imatges.get('url') + if thumbnail_url: + thumbnails = [{ + 'url': thumbnail_url, + 'width': int_or_none(imatges.get('amplada')), + 'height': int_or_none(imatges.get('alcada')), + }] + + return { + 'id': media_id, + 'title': title, + 'description': clean_html(informacio.get('descripcio')), + 'duration': duration, + 'timestamp': timestamp, + 'thumbnails': thumbnails, + 'subtitles': subtitles, + 'formats': formats, + } diff --git a/youtube_dl/extractor/cctv.py b/youtube_dl/extractor/cctv.py new file mode 100644 index 000000000..c76f361c6 --- /dev/null +++ b/youtube_dl/extractor/cctv.py @@ -0,0 +1,191 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + float_or_none, + try_get, + unified_timestamp, +) + + +class CCTVIE(InfoExtractor): + IE_DESC = '央视网' + _VALID_URL = r'https?://(?:(?:[^/]+)\.(?:cntv|cctv)\.(?:com|cn)|(?:www\.)?ncpa-classic\.com)/(?:[^/]+/)*?(?P[^/?#&]+?)(?:/index)?(?:\.s?html|[?#&]|$)' + _TESTS = [{ + # fo.addVariable("videoCenterId","id") + 'url': 'http://sports.cntv.cn/2016/02/12/ARTIaBRxv4rTT1yWf1frW2wi160212.shtml', + 'md5': 'd61ec00a493e09da810bf406a078f691', + 'info_dict': { + 'id': '5ecdbeab623f4973b40ff25f18b174e8', + 'ext': 'mp4', + 'title': '[NBA]二少联手砍下46分 雷霆主场击败鹈鹕(快讯)', + 'description': 'md5:7e14a5328dc5eb3d1cd6afbbe0574e95', + 'duration': 98, + 'uploader': 'songjunjie', + 'timestamp': 1455279956, + 'upload_date': '20160212', + }, + }, { + # var guid = "id" + 'url': 'http://tv.cctv.com/2016/02/05/VIDEUS7apq3lKrHG9Dncm03B160205.shtml', + 'info_dict': { + 'id': 'efc5d49e5b3b4ab2b34f3a502b73d3ae', + 'ext': 'mp4', + 'title': '[赛车]“车王”舒马赫恢复情况成谜(快讯)', + 'description': '2月4日,蒙特泽莫罗透露了关于“车王”舒马赫恢复情况,但情况是否属实遭到了质疑。', + 'duration': 37, + 'uploader': 'shujun', + 'timestamp': 1454677291, + 'upload_date': '20160205', + }, + 'params': { + 'skip_download': True, + }, + }, { + # changePlayer('id') + 'url': 'http://english.cntv.cn/special/four_comprehensives/index.shtml', + 'info_dict': { + 'id': '4bb9bb4db7a6471ba85fdeda5af0381e', + 'ext': 'mp4', + 'title': 'NHnews008 ANNUAL POLITICAL SEASON', + 'description': 'Four Comprehensives', + 'duration': 60, + 'uploader': 'zhangyunlei', + 'timestamp': 1425385521, + 'upload_date': '20150303', + }, + 'params': { + 'skip_download': True, + }, + }, { + # loadvideo('id') + 'url': 'http://cctv.cntv.cn/lm/tvseries_russian/yilugesanghua/index.shtml', + 'info_dict': { + 'id': 'b15f009ff45c43968b9af583fc2e04b2', + 'ext': 'mp4', + 'title': 'Путь,усыпанный космеями Серия 1', + 'description': 'Путь, усыпанный космеями', + 'duration': 2645, + 'uploader': 'renxue', + 'timestamp': 1477479241, + 'upload_date': '20161026', + }, + 'params': { + 'skip_download': True, + }, + }, { + # var initMyAray = 'id' + 'url': 'http://www.ncpa-classic.com/2013/05/22/VIDE1369219508996867.shtml', + 'info_dict': { + 'id': 'a194cfa7f18c426b823d876668325946', + 'ext': 'mp4', + 'title': '小泽征尔音乐塾 音乐梦想无国界', + 'duration': 2173, + 'timestamp': 1369248264, + 'upload_date': '20130522', + }, + 'params': { + 'skip_download': True, + }, + }, { + # var ids = ["id"] + 'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml', + 'info_dict': { + 'id': 'a8606119a4884588a79d81c02abecc16', + 'ext': 'mp3', + 'title': '来自维也纳的新年贺礼', + 'description': 'md5:f13764ae8dd484e84dd4b39d5bcba2a7', + 'duration': 1578, + 'uploader': 'djy', + 'timestamp': 1482942419, + 'upload_date': '20161228', + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['Failed to download m3u8 information'], + }, { + 'url': 'http://ent.cntv.cn/2016/01/18/ARTIjprSSJH8DryTVr5Bx8Wb160118.shtml', + 'only_matching': True, + }, { + 'url': 'http://tv.cntv.cn/video/C39296/e0210d949f113ddfb38d31f00a4e5c44', + 'only_matching': True, + }, { + 'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml', + 'only_matching': True, + }, { + 'url': 'http://tv.cctv.com/2016/09/07/VIDE5C1FnlX5bUywlrjhxXOV160907.shtml', + 'only_matching': True, + }, { + 'url': 'http://tv.cntv.cn/video/C39296/95cfac44cabd3ddc4a9438780a4e5c44', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_id = self._search_regex( + [r'var\s+guid\s*=\s*["\']([\da-fA-F]+)', + r'videoCenterId["\']\s*,\s*["\']([\da-fA-F]+)', + r'changePlayer\s*\(\s*["\']([\da-fA-F]+)', + r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)', + r'var\s+initMyAray\s*=\s*["\']([\da-fA-F]+)', + r'var\s+ids\s*=\s*\[["\']([\da-fA-F]+)'], + webpage, 'video id') + + data = self._download_json( + 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do', video_id, + query={ + 'pid': video_id, + 'url': url, + 'idl': 32, + 'idlr': 32, + 'modifyed': 'false', + }) + + title = data['title'] + + formats = [] + + video = data.get('video') + if isinstance(video, dict): + for quality, chapters_key in enumerate(('lowChapters', 'chapters')): + video_url = try_get( + video, lambda x: x[chapters_key][0]['url'], compat_str) + if video_url: + formats.append({ + 'url': video_url, + 'format_id': 'http', + 'quality': quality, + 'preference': -1, + }) + + hls_url = try_get(data, lambda x: x['hls_url'], compat_str) + if hls_url: + hls_url = re.sub(r'maxbr=\d+&?', '', hls_url) + formats.extend(self._extract_m3u8_formats( + hls_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + + self._sort_formats(formats) + + uploader = data.get('editer_name') + description = self._html_search_meta( + 'description', webpage, default=None) + timestamp = unified_timestamp(data.get('f_pgmtime')) + duration = float_or_none(try_get(video, lambda x: x['totalLength'])) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'uploader': uploader, + 'timestamp': timestamp, + 'duration': duration, + 'formats': formats, + } diff --git a/youtube_dl/extractor/cda.py b/youtube_dl/extractor/cda.py new file mode 100644 index 000000000..0c3af23d5 --- /dev/null +++ b/youtube_dl/extractor/cda.py @@ -0,0 +1,182 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import codecs +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + multipart_encode, + parse_duration, + random_birthday, + urljoin, +) + + +class CDAIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P[0-9a-z]+)' + _BASE_URL = 'http://www.cda.pl/' + _TESTS = [{ + 'url': 'http://www.cda.pl/video/5749950c', + 'md5': '6f844bf51b15f31fae165365707ae970', + 'info_dict': { + 'id': '5749950c', + 'ext': 'mp4', + 'height': 720, + 'title': 'Oto dlaczego przed zakrętem należy zwolnić.', + 'description': 'md5:269ccd135d550da90d1662651fcb9772', + 'thumbnail': r're:^https?://.*\.jpg$', + 'average_rating': float, + 'duration': 39, + 'age_limit': 0, + } + }, { + 'url': 'http://www.cda.pl/video/57413289', + 'md5': 'a88828770a8310fc00be6c95faf7f4d5', + 'info_dict': { + 'id': '57413289', + 'ext': 'mp4', + 'title': 'Lądowanie na lotnisku na Maderze', + 'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'crash404', + 'view_count': int, + 'average_rating': float, + 'duration': 137, + 'age_limit': 0, + } + }, { + # Age-restricted + 'url': 'http://www.cda.pl/video/1273454c4', + 'info_dict': { + 'id': '1273454c4', + 'ext': 'mp4', + 'title': 'Bronson (2008) napisy HD 1080p', + 'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c', + 'height': 1080, + 'uploader': 'boniek61', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 5554, + 'age_limit': 18, + 'view_count': int, + 'average_rating': float, + }, + }, { + 'url': 'http://ebd.cda.pl/0x0/5749950c', + 'only_matching': True, + }] + + def _download_age_confirm_page(self, url, video_id, *args, **kwargs): + form_data = random_birthday('rok', 'miesiac', 'dzien') + form_data.update({'return': url, 'module': 'video', 'module_id': video_id}) + data, content_type = multipart_encode(form_data) + return self._download_webpage( + urljoin(url, '/a/validatebirth'), video_id, *args, + data=data, headers={ + 'Referer': url, + 'Content-Type': content_type, + }, **kwargs) + + def _real_extract(self, url): + video_id = self._match_id(url) + self._set_cookie('cda.pl', 'cda.player', 'html5') + webpage = self._download_webpage( + self._BASE_URL + '/video/' + video_id, video_id) + + if 'Ten film jest dostępny dla użytkowników premium' in webpage: + raise ExtractorError('This video is only available for premium users.', expected=True) + + need_confirm_age = False + if self._html_search_regex(r'(]+action="/a/validatebirth")', + webpage, 'birthday validate form', default=None): + webpage = self._download_age_confirm_page( + url, video_id, note='Confirming age') + need_confirm_age = True + + formats = [] + + uploader = self._search_regex(r'''(?x) + <(span|meta)[^>]+itemprop=(["\'])author\2[^>]*> + (?:<\1[^>]*>[^<]*|(?!)(?:.|\n))*? + <(span|meta)[^>]+itemprop=(["\'])name\4[^>]*>(?P[^<]+) + ''', webpage, 'uploader', default=None, group='uploader') + view_count = self._search_regex( + r'Odsłony:(?:\s| )*([0-9]+)', webpage, + 'view_count', default=None) + average_rating = self._search_regex( + r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P[0-9.]+)', + webpage, 'rating', fatal=False, group='rating_value') + + info_dict = { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'uploader': uploader, + 'view_count': int_or_none(view_count), + 'average_rating': float_or_none(average_rating), + 'thumbnail': self._og_search_thumbnail(webpage), + 'formats': formats, + 'duration': None, + 'age_limit': 18 if need_confirm_age else 0, + } + + def extract_format(page, version): + json_str = self._html_search_regex( + r'player_data=(\\?["\'])(?P.+?)\1', page, + '%s player_json' % version, fatal=False, group='player_data') + if not json_str: + return + player_data = self._parse_json( + json_str, '%s player_data' % version, fatal=False) + if not player_data: + return + video = player_data.get('video') + if not video or 'file' not in video: + self.report_warning('Unable to extract %s version information' % version) + return + if video['file'].startswith('uggc'): + video['file'] = codecs.decode(video['file'], 'rot_13') + if video['file'].endswith('adc.mp4'): + video['file'] = video['file'].replace('adc.mp4', '.mp4') + f = { + 'url': video['file'], + } + m = re.search( + r']+data-quality="(?P[^"]+)"[^>]+href="[^"]+"[^>]+class="[^"]*quality-btn-active[^"]*">(?P[0-9]+)p', + page) + if m: + f.update({ + 'format_id': m.group('format_id'), + 'height': int(m.group('height')), + }) + info_dict['formats'].append(f) + if not info_dict['duration']: + info_dict['duration'] = parse_duration(video.get('duration')) + + extract_format(webpage, 'default') + + for href, resolution in re.findall( + r']+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)', + webpage): + if need_confirm_age: + handler = self._download_age_confirm_page + else: + handler = self._download_webpage + + webpage = handler( + self._BASE_URL + href, video_id, + 'Downloading %s version information' % resolution, fatal=False) + if not webpage: + # Manually report warning because empty page is returned when + # invalid version is requested. + self.report_warning('Unable to download %s version information' % resolution) + continue + + extract_format(webpage, resolution) + + self._sort_formats(formats) + + return info_dict diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py new file mode 100644 index 000000000..1ec58f7d8 --- /dev/null +++ b/youtube_dl/extractor/ceskatelevize.py @@ -0,0 +1,287 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_unquote, + compat_urllib_parse_urlparse, +) +from ..utils import ( + ExtractorError, + float_or_none, + sanitized_Request, + unescapeHTML, + update_url_query, + urlencode_postdata, + USER_AGENTS, +) + + +class CeskaTelevizeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P[^/#?]+)' + _TESTS = [{ + 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', + 'info_dict': { + 'id': '61924494877246241', + 'ext': 'mp4', + 'title': 'Hyde Park Civilizace: Život v Grónsku', + 'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 3350, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', + 'info_dict': { + 'id': '61924494877028507', + 'ext': 'mp4', + 'title': 'Hyde Park Civilizace: Bonus 01 - En', + 'description': 'English Subtittles', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 81.3, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # live stream + 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', + 'info_dict': { + 'id': 402, + 'ext': 'mp4', + 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', + 'is_live': True, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'skip': 'Georestricted to Czech Republic', + }, { + 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' + if '%s

    ' % NOT_AVAILABLE_STRING in webpage: + raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) + + type_ = None + episode_id = None + + playlist = self._parse_json( + self._search_regex( + r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist', + default='{}'), playlist_id) + if playlist: + type_ = playlist.get('type') + episode_id = playlist.get('id') + + if not type_: + type_ = self._html_search_regex( + r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],', + webpage, 'type') + if not episode_id: + episode_id = self._html_search_regex( + r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],', + webpage, 'episode_id') + + data = { + 'playlist[0][type]': type_, + 'playlist[0][id]': episode_id, + 'requestUrl': compat_urllib_parse_urlparse(url).path, + 'requestSource': 'iVysilani', + } + + entries = [] + + for user_agent in (None, USER_AGENTS['Safari']): + req = sanitized_Request( + 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', + data=urlencode_postdata(data)) + + req.add_header('Content-type', 'application/x-www-form-urlencoded') + req.add_header('x-addr', '127.0.0.1') + req.add_header('X-Requested-With', 'XMLHttpRequest') + if user_agent: + req.add_header('User-Agent', user_agent) + req.add_header('Referer', url) + + playlistpage = self._download_json(req, playlist_id, fatal=False) + + if not playlistpage: + continue + + playlist_url = playlistpage['url'] + if playlist_url == 'error_region': + raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) + + req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) + req.add_header('Referer', url) + + playlist_title = self._og_search_title(webpage, default=None) + playlist_description = self._og_search_description(webpage, default=None) + + playlist = self._download_json(req, playlist_id, fatal=False) + if not playlist: + continue + + playlist = playlist.get('playlist') + if not isinstance(playlist, list): + continue + + playlist_len = len(playlist) + + for num, item in enumerate(playlist): + is_live = item.get('type') == 'LIVE' + formats = [] + for format_id, stream_url in item.get('streamUrls', {}).items(): + if 'playerType=flash' in stream_url: + stream_formats = self._extract_m3u8_formats( + stream_url, playlist_id, 'mp4', 'm3u8_native', + m3u8_id='hls-%s' % format_id, fatal=False) + else: + stream_formats = self._extract_mpd_formats( + stream_url, playlist_id, + mpd_id='dash-%s' % format_id, fatal=False) + # See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031 + if format_id == 'audioDescription': + for f in stream_formats: + f['source_preference'] = -10 + formats.extend(stream_formats) + + if user_agent and len(entries) == playlist_len: + entries[num]['formats'].extend(formats) + continue + + item_id = item.get('id') or item['assetId'] + title = item['title'] + + duration = float_or_none(item.get('duration')) + thumbnail = item.get('previewImageUrl') + + subtitles = {} + if item.get('type') == 'VOD': + subs = item.get('subtitles') + if subs: + subtitles = self.extract_subtitles(episode_id, subs) + + if playlist_len == 1: + final_title = playlist_title or title + if is_live: + final_title = self._live_title(final_title) + else: + final_title = '%s (%s)' % (playlist_title, title) + + entries.append({ + 'id': item_id, + 'title': final_title, + 'description': playlist_description if playlist_len == 1 else None, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + 'subtitles': subtitles, + 'is_live': is_live, + }) + + for e in entries: + self._sort_formats(e['formats']) + + return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) + + def _get_subtitles(self, episode_id, subs): + original_subtitles = self._download_webpage( + subs[0]['url'], episode_id, 'Downloading subtitles') + srt_subs = self._fix_subtitles(original_subtitles) + return { + 'cs': [{ + 'ext': 'srt', + 'data': srt_subs, + }] + } + + @staticmethod + def _fix_subtitles(subtitles): + """ Convert millisecond-based subtitles to SRT """ + + def _msectotimecode(msec): + """ Helper utility to convert milliseconds to timecode """ + components = [] + for divider in [1000, 60, 60, 100]: + components.append(msec % divider) + msec //= divider + return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components) + + def _fix_subtitle(subtitle): + for line in subtitle.splitlines(): + m = re.match(r'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line) + if m: + yield m.group(1) + start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:]) + yield '{0} --> {1}'.format(start, stop) + else: + yield line + + return '\r\n'.join(_fix_subtitle(subtitles)) + + +class CeskaTelevizePoradyIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P[^/#?]+)' + _TESTS = [{ + # video with 18+ caution trailer + 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', + 'info_dict': { + 'id': '215562210900007-bogotart', + 'title': 'Queer: Bogotart', + 'description': 'Alternativní průvodce současným queer světem', + }, + 'playlist': [{ + 'info_dict': { + 'id': '61924494876844842', + 'ext': 'mp4', + 'title': 'Queer: Bogotart (Varování 18+)', + 'duration': 10.2, + }, + }, { + 'info_dict': { + 'id': '61924494877068022', + 'ext': 'mp4', + 'title': 'Queer: Bogotart (Queer)', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 1558.3, + }, + }], + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # iframe embed + 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + data_url = update_url_query(unescapeHTML(self._search_regex( + (r']*\bdata-url=(["\'])(?P(?:(?!\1).)+)\1', + r']+\bsrc=(["\'])(?P(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'), + webpage, 'iframe player url', group='url')), query={ + 'autoStart': 'true', + }) + + return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key()) diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py new file mode 100644 index 000000000..81108e704 --- /dev/null +++ b/youtube_dl/extractor/channel9.py @@ -0,0 +1,262 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + ExtractorError, + int_or_none, + parse_iso8601, + qualities, + unescapeHTML, +) + + +class Channel9IE(InfoExtractor): + IE_DESC = 'Channel 9' + IE_NAME = 'channel9' + _VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P.+?)(?P/RSS)?/?(?:[?#&]|$)' + + _TESTS = [{ + 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002', + 'md5': '32083d4eaf1946db6d454313f44510ca', + 'info_dict': { + 'id': '6c413323-383a-49dc-88f9-a22800cab024', + 'ext': 'wmv', + 'title': 'Developer Kick-Off Session: Stuff We Love', + 'description': 'md5:b80bf9355a503c193aff7ec6cd5a7731', + 'duration': 4576, + 'thumbnail': r're:https?://.*\.jpg', + 'timestamp': 1377717420, + 'upload_date': '20130828', + 'session_code': 'KOS002', + 'session_room': 'Arena 1A', + 'session_speakers': ['Andrew Coates', 'Brady Gaster', 'Mads Kristensen', 'Ed Blankenship', 'Patrick Klug'], + }, + }, { + 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing', + 'md5': 'dcf983ee6acd2088e7188c3cf79b46bc', + 'info_dict': { + 'id': 'fe8e435f-bb93-4e01-8e97-a28c01887024', + 'ext': 'wmv', + 'title': 'Self-service BI with Power BI - nuclear testing', + 'description': 'md5:2d17fec927fc91e9e17783b3ecc88f54', + 'duration': 1540, + 'thumbnail': r're:https?://.*\.jpg', + 'timestamp': 1386381991, + 'upload_date': '20131207', + 'authors': ['Mike Wilmot'], + }, + }, { + # low quality mp4 is best + 'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library', + 'info_dict': { + 'id': '33ad69d2-6a4e-4172-83a1-a523013dec76', + 'ext': 'mp4', + 'title': 'Ranges for the Standard Library', + 'description': 'md5:9895e0a9fd80822d2f01c454b8f4a372', + 'duration': 5646, + 'thumbnail': r're:https?://.*\.jpg', + 'upload_date': '20150930', + 'timestamp': 1443640735, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS', + 'info_dict': { + 'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b', + 'title': 'Channel 9', + }, + 'playlist_mincount': 100, + }, { + 'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS', + 'only_matching': True, + }, { + 'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman', + 'only_matching': True, + }] + + _RSS_URL = 'http://channel9.msdn.com/%s/RSS' + + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b', + webpage) + + def _extract_list(self, video_id, rss_url=None): + if not rss_url: + rss_url = self._RSS_URL % video_id + rss = self._download_xml(rss_url, video_id, 'Downloading RSS') + entries = [self.url_result(session_url.text, 'Channel9') + for session_url in rss.findall('./channel/item/link')] + title_text = rss.find('./channel/title').text + return self.playlist_result(entries, video_id, title_text) + + def _real_extract(self, url): + content_path, rss = re.match(self._VALID_URL, url).groups() + + if rss: + return self._extract_list(content_path, url) + + webpage = self._download_webpage( + url, content_path, 'Downloading web page') + + episode_data = self._search_regex( + r"data-episode='([^']+)'", webpage, 'episode data', default=None) + if episode_data: + episode_data = self._parse_json(unescapeHTML( + episode_data), content_path) + content_id = episode_data['contentId'] + is_session = '/Sessions(' in episode_data['api'] + content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + if is_session: + content_url += '?$expand=Speakers' + else: + content_url += '?$expand=Authors' + content_data = self._download_json(content_url, content_id) + title = content_data['Title'] + + QUALITIES = ( + 'mp3', + 'wmv', 'mp4', + 'wmv-low', 'mp4-low', + 'wmv-mid', 'mp4-mid', + 'wmv-high', 'mp4-high', + ) + + quality_key = qualities(QUALITIES) + + def quality(quality_id, format_url): + return (len(QUALITIES) if '_Source.' in format_url + else quality_key(quality_id)) + + formats = [] + urls = set() + + SITE_QUALITIES = { + 'MP3': 'mp3', + 'MP4': 'mp4', + 'Low Quality WMV': 'wmv-low', + 'Low Quality MP4': 'mp4-low', + 'Mid Quality WMV': 'wmv-mid', + 'Mid Quality MP4': 'mp4-mid', + 'High Quality WMV': 'wmv-high', + 'High Quality MP4': 'mp4-high', + } + + formats_select = self._search_regex( + r'(?s)]+name=["\']format[^>]+>(.+?)]+\bvalue=(["\'])(?P(?:(?!\1).)+)\1[^>]*>\s*(?P[^<]+?)\s*<', + formats_select): + format_url = mobj.group('url') + if format_url in urls: + continue + urls.add(format_url) + format_id = mobj.group('format') + quality_id = SITE_QUALITIES.get(format_id, format_id) + formats.append({ + 'url': format_url, + 'format_id': quality_id, + 'quality': quality(quality_id, format_url), + 'vcodec': 'none' if quality_id == 'mp3' else None, + }) + + API_QUALITIES = { + 'VideoMP4Low': 'mp4-low', + 'VideoWMV': 'wmv-mid', + 'VideoMP4Medium': 'mp4-mid', + 'VideoMP4High': 'mp4-high', + 'VideoWMVHQ': 'wmv-hq', + } + + for format_id, q in API_QUALITIES.items(): + q_url = content_data.get(format_id) + if not q_url or q_url in urls: + continue + urls.add(q_url) + formats.append({ + 'url': q_url, + 'format_id': q, + 'quality': quality(q, q_url), + }) + + self._sort_formats(formats) + + slides = content_data.get('Slides') + zip_file = content_data.get('ZipFile') + + if not formats and not slides and not zip_file: + raise ExtractorError( + 'None of recording, slides or zip are available for %s' % content_path) + + subtitles = {} + for caption in content_data.get('Captions', []): + caption_url = caption.get('Url') + if not caption_url: + continue + subtitles.setdefault(caption.get('Language', 'en'), []).append({ + 'url': caption_url, + 'ext': 'vtt', + }) + + common = { + 'id': content_id, + 'title': title, + 'description': clean_html(content_data.get('Description') or content_data.get('Body')), + 'thumbnail': content_data.get('Thumbnail') or content_data.get('VideoPlayerPreviewImage'), + 'duration': int_or_none(content_data.get('MediaLengthInSeconds')), + 'timestamp': parse_iso8601(content_data.get('PublishedDate')), + 'avg_rating': int_or_none(content_data.get('Rating')), + 'rating_count': int_or_none(content_data.get('RatingCount')), + 'view_count': int_or_none(content_data.get('Views')), + 'comment_count': int_or_none(content_data.get('CommentCount')), + 'subtitles': subtitles, + } + if is_session: + speakers = [] + for s in content_data.get('Speakers', []): + speaker_name = s.get('FullName') + if not speaker_name: + continue + speakers.append(speaker_name) + + common.update({ + 'session_code': content_data.get('Code'), + 'session_room': content_data.get('Room'), + 'session_speakers': speakers, + }) + else: + authors = [] + for a in content_data.get('Authors', []): + author_name = a.get('DisplayName') + if not author_name: + continue + authors.append(author_name) + common['authors'] = authors + + contents = [] + + if slides: + d = common.copy() + d.update({'title': title + '-Slides', 'url': slides}) + contents.append(d) + + if zip_file: + d = common.copy() + d.update({'title': title + '-Zip', 'url': zip_file}) + contents.append(d) + + if formats: + d = common.copy() + d.update({'title': title, 'formats': formats}) + contents.append(d) + return self.playlist_result(contents) + else: + return self._extract_list(content_path) diff --git a/youtube_dl/extractor/charlierose.py b/youtube_dl/extractor/charlierose.py new file mode 100644 index 000000000..42c9af263 --- /dev/null +++ b/youtube_dl/extractor/charlierose.py @@ -0,0 +1,54 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import remove_end + + +class CharlieRoseIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?charlierose\.com/(?:video|episode)(?:s|/player)/(?P\d+)' + _TESTS = [{ + 'url': 'https://charlierose.com/videos/27996', + 'md5': 'fda41d49e67d4ce7c2411fd2c4702e09', + 'info_dict': { + 'id': '27996', + 'ext': 'mp4', + 'title': 'Remembering Zaha Hadid', + 'thumbnail': r're:^https?://.*\.jpg\?\d+', + 'description': 'We revisit past conversations with Zaha Hadid, in memory of the world renowned Iraqi architect.', + 'subtitles': { + 'en': [{ + 'ext': 'vtt', + }], + }, + }, + }, { + 'url': 'https://charlierose.com/videos/27996', + 'only_matching': True, + }, { + 'url': 'https://charlierose.com/episodes/30887?autoplay=true', + 'only_matching': True, + }] + + _PLAYER_BASE = 'https://charlierose.com/video/player/%s' + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(self._PLAYER_BASE % video_id, video_id) + + title = remove_end(self._og_search_title(webpage), ' - Charlie Rose') + + info_dict = self._parse_html5_media_entries( + self._PLAYER_BASE % video_id, webpage, video_id, + m3u8_entry_protocol='m3u8_native')[0] + + self._sort_formats(info_dict['formats']) + self._remove_duplicate_formats(info_dict['formats']) + + info_dict.update({ + 'id': video_id, + 'title': title, + 'thumbnail': self._og_search_thumbnail(webpage), + 'description': self._og_search_description(webpage), + }) + + return info_dict diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py new file mode 100644 index 000000000..e2b828d8a --- /dev/null +++ b/youtube_dl/extractor/chaturbate.py @@ -0,0 +1,77 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class ChaturbateIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?P[^/?#]+)' + _TESTS = [{ + 'url': 'https://www.chaturbate.com/siswet19/', + 'info_dict': { + 'id': 'siswet19', + 'ext': 'mp4', + 'title': 're:^siswet19 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'age_limit': 18, + 'is_live': True, + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Room is offline', + }, { + 'url': 'https://en.chaturbate.com/siswet19/', + 'only_matching': True, + }] + + _ROOM_OFFLINE = 'Room is currently offline' + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + url, video_id, headers=self.geo_verification_headers()) + + m3u8_urls = [] + + for m in re.finditer( + r'(["\'])(?Phttp.+?\.m3u8.*?)\1', webpage): + m3u8_fast_url, m3u8_no_fast_url = m.group('url'), m.group( + 'url').replace('_fast', '') + for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url): + if m3u8_url not in m3u8_urls: + m3u8_urls.append(m3u8_url) + + if not m3u8_urls: + error = self._search_regex( + [r']+class=(["\'])desc_span\1[^>]*>(?P[^<]+)', + r']+id=(["\'])defchat\1[^>]*>\s*

    (?P[^<]+)<'], + webpage, 'error', group='error', default=None) + if not error: + if any(p in webpage for p in ( + self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')): + error = self._ROOM_OFFLINE + if error: + raise ExtractorError(error, expected=True) + raise ExtractorError('Unable to find stream URL') + + formats = [] + for m3u8_url in m3u8_urls: + m3u8_id = 'fast' if '_fast' in m3u8_url else 'slow' + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, ext='mp4', + # ffmpeg skips segments for fast m3u8 + preference=-10 if m3u8_id == 'fast' else None, + m3u8_id=m3u8_id, fatal=False, live=True)) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._live_title(video_id), + 'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id, + 'age_limit': self._rta_search(webpage), + 'is_live': True, + 'formats': formats, + } diff --git a/youtube_dl/extractor/chilloutzone.py b/youtube_dl/extractor/chilloutzone.py new file mode 100644 index 000000000..5aac21299 --- /dev/null +++ b/youtube_dl/extractor/chilloutzone.py @@ -0,0 +1,96 @@ +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from .youtube import YoutubeIE +from ..compat import compat_b64decode +from ..utils import ( + clean_html, + ExtractorError +) + + +class ChilloutzoneIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?chilloutzone\.net/video/(?P[\w|-]+)\.html' + _TESTS = [{ + 'url': 'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html', + 'md5': 'a76f3457e813ea0037e5244f509e66d1', + 'info_dict': { + 'id': 'enemene-meck-alle-katzen-weg', + 'ext': 'mp4', + 'title': 'Enemene Meck - Alle Katzen weg', + 'description': 'Ist das der Umkehrschluss des Niesenden Panda-Babys?', + }, + }, { + 'note': 'Video hosted at YouTube', + 'url': 'http://www.chilloutzone.net/video/eine-sekunde-bevor.html', + 'info_dict': { + 'id': '1YVQaAgHyRU', + 'ext': 'mp4', + 'title': '16 Photos Taken 1 Second Before Disaster', + 'description': 'md5:58a8fcf6a459fe0a08f54140f0ad1814', + 'uploader': 'BuzzFeedVideo', + 'uploader_id': 'BuzzFeedVideo', + 'upload_date': '20131105', + }, + }, { + 'note': 'Video hosted at Vimeo', + 'url': 'http://www.chilloutzone.net/video/icon-blending.html', + 'md5': '2645c678b8dc4fefcc0e1b60db18dac1', + 'info_dict': { + 'id': '85523671', + 'ext': 'mp4', + 'title': 'The Sunday Times - Icons', + 'description': 're:(?s)^Watch the making of - makingoficons.com.{300,}', + 'uploader': 'Us', + 'uploader_id': 'usfilms', + 'upload_date': '20140131' + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + base64_video_info = self._html_search_regex( + r'var cozVidData = "(.+?)";', webpage, 'video data') + decoded_video_info = compat_b64decode(base64_video_info).decode('utf-8') + video_info_dict = json.loads(decoded_video_info) + + # get video information from dict + video_url = video_info_dict['mediaUrl'] + description = clean_html(video_info_dict.get('description')) + title = video_info_dict['title'] + native_platform = video_info_dict['nativePlatform'] + native_video_id = video_info_dict['nativeVideoId'] + source_priority = video_info_dict['sourcePriority'] + + # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed) + if native_platform is None: + youtube_url = YoutubeIE._extract_url(webpage) + if youtube_url: + return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) + + # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or + # the own CDN + if source_priority == 'native': + if native_platform == 'youtube': + return self.url_result(native_video_id, ie='Youtube') + if native_platform == 'vimeo': + return self.url_result( + 'http://vimeo.com/' + native_video_id, ie='Vimeo') + + if not video_url: + raise ExtractorError('No video found') + + return { + 'id': video_id, + 'url': video_url, + 'ext': 'mp4', + 'title': title, + 'description': description, + } diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dl/extractor/chirbit.py new file mode 100644 index 000000000..8d75cdf19 --- /dev/null +++ b/youtube_dl/extractor/chirbit.py @@ -0,0 +1,91 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_b64decode +from ..utils import parse_duration + + +class ChirbitIE(InfoExtractor): + IE_NAME = 'chirbit' + _VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P[\da-zA-Z]+)' + _TESTS = [{ + 'url': 'http://chirb.it/be2abG', + 'info_dict': { + 'id': 'be2abG', + 'ext': 'mp3', + 'title': 'md5:f542ea253f5255240be4da375c6a5d7e', + 'description': 'md5:f24a4e22a71763e32da5fed59e47c770', + 'duration': 306, + 'uploader': 'Gerryaudio', + }, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5', + 'only_matching': True, + }, { + 'url': 'https://chirb.it/wp/MN58c2', + 'only_matching': True, + }] + + def _real_extract(self, url): + audio_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://chirb.it/%s' % audio_id, audio_id) + + data_fd = self._search_regex( + r'data-fd=(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'data fd', group='url') + + # Reverse engineered from https://chirb.it/js/chirbit.player.js (look + # for soundURL) + audio_url = compat_b64decode(data_fd[::-1]).decode('utf-8') + + title = self._search_regex( + r'class=["\']chirbit-title["\'][^>]*>([^<]+)', webpage, 'title') + description = self._search_regex( + r'

    Description

    \s*]*>([^<]+)', + webpage, 'description', default=None) + duration = parse_duration(self._search_regex( + r'class=["\']c-length["\'][^>]*>([^<]+)', + webpage, 'duration', fatal=False)) + uploader = self._search_regex( + r'id=["\']chirbit-username["\'][^>]*>([^<]+)', + webpage, 'uploader', fatal=False) + + return { + 'id': audio_id, + 'url': audio_url, + 'title': title, + 'description': description, + 'duration': duration, + 'uploader': uploader, + } + + +class ChirbitProfileIE(InfoExtractor): + IE_NAME = 'chirbit:profile' + _VALID_URL = r'https?://(?:www\.)?chirbit\.com/(?:rss/)?(?P[^/]+)' + _TEST = { + 'url': 'http://chirbit.com/ScarletBeauty', + 'info_dict': { + 'id': 'ScarletBeauty', + }, + 'playlist_mincount': 3, + } + + def _real_extract(self, url): + profile_id = self._match_id(url) + + webpage = self._download_webpage(url, profile_id) + + entries = [ + self.url_result(self._proto_relative_url('//chirb.it/' + video_id)) + for _, video_id in re.findall(r']+id=([\'"])copy-btn-(?P[0-9a-zA-Z]+)\1', webpage)] + + return self.playlist_result(entries, profile_id) diff --git a/youtube_dl/extractor/cinchcast.py b/youtube_dl/extractor/cinchcast.py new file mode 100644 index 000000000..b861d54b0 --- /dev/null +++ b/youtube_dl/extractor/cinchcast.py @@ -0,0 +1,58 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + unified_strdate, + xpath_text, +) + + +class CinchcastIE(InfoExtractor): + _VALID_URL = r'https?://player\.cinchcast\.com/.*?(?:assetId|show_id)=(?P[0-9]+)' + _TESTS = [{ + 'url': 'http://player.cinchcast.com/?show_id=5258197&platformId=1&assetType=single', + 'info_dict': { + 'id': '5258197', + 'ext': 'mp3', + 'title': 'Train Your Brain to Up Your Game with Coach Mandy', + 'upload_date': '20130816', + }, + }, { + # Actual test is run in generic, look for undergroundwellness + 'url': 'http://player.cinchcast.com/?platformId=1&assetType=single&assetId=7141703', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + doc = self._download_xml( + 'http://www.blogtalkradio.com/playerasset/mrss?assetType=single&assetId=%s' % video_id, + video_id) + + item = doc.find('.//item') + title = xpath_text(item, './title', fatal=True) + date_str = xpath_text( + item, './{http://developer.longtailvideo.com/trac/}date') + upload_date = unified_strdate(date_str, day_first=False) + # duration is present but wrong + formats = [{ + 'format_id': 'main', + 'url': item.find('./{http://search.yahoo.com/mrss/}content').attrib['url'], + }] + backup_url = xpath_text( + item, './{http://developer.longtailvideo.com/trac/}backupContent') + if backup_url: + formats.append({ + 'preference': 2, # seems to be more reliable + 'format_id': 'backup', + 'url': backup_url, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'upload_date': upload_date, + 'formats': formats, + } diff --git a/youtube_dl/extractor/cinemax.py b/youtube_dl/extractor/cinemax.py new file mode 100644 index 000000000..7f89d33de --- /dev/null +++ b/youtube_dl/extractor/cinemax.py @@ -0,0 +1,29 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .hbo import HBOBaseIE + + +class CinemaxIE(HBOBaseIE): + _VALID_URL = r'https?://(?:www\.)?cinemax\.com/(?P[^/]+/video/[0-9a-z-]+-(?P\d+))' + _TESTS = [{ + 'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903', + 'md5': '82e0734bba8aa7ef526c9dd00cf35a05', + 'info_dict': { + 'id': '20126903', + 'ext': 'mp4', + 'title': 'S1 Ep 1: Recap', + }, + 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'], + }, { + 'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903.embed', + 'only_matching': True, + }] + + def _real_extract(self, url): + path, video_id = re.match(self._VALID_URL, url).groups() + info = self._extract_info('https://www.cinemax.com/%s.xml' % path, video_id) + info['id'] = video_id + return info diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dl/extractor/ciscolive.py new file mode 100644 index 000000000..da404e4dc --- /dev/null +++ b/youtube_dl/extractor/ciscolive.py @@ -0,0 +1,151 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_urlparse, +) +from ..utils import ( + clean_html, + float_or_none, + int_or_none, + try_get, + urlencode_postdata, +) + + +class CiscoLiveBaseIE(InfoExtractor): + # These appear to be constant across all Cisco Live presentations + # and are not tied to any user session or event + RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s' + RAINFOCUS_API_PROFILE_ID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz' + RAINFOCUS_WIDGET_ID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye' + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s' + + HEADERS = { + 'Origin': 'https://ciscolive.cisco.com', + 'rfApiProfileId': RAINFOCUS_API_PROFILE_ID, + 'rfWidgetId': RAINFOCUS_WIDGET_ID, + } + + def _call_api(self, ep, rf_id, query, referrer, note=None): + headers = self.HEADERS.copy() + headers['Referer'] = referrer + return self._download_json( + self.RAINFOCUS_API_URL % ep, rf_id, note=note, + data=urlencode_postdata(query), headers=headers) + + def _parse_rf_item(self, rf_item): + event_name = rf_item.get('eventName') + title = rf_item['title'] + description = clean_html(rf_item.get('abstract')) + presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName']) + bc_id = rf_item['videos'][0]['url'] + bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id + duration = float_or_none(try_get(rf_item, lambda x: x['times'][0]['length'])) + location = try_get(rf_item, lambda x: x['times'][0]['room']) + + if duration: + duration = duration * 60 + + return { + '_type': 'url_transparent', + 'url': bc_url, + 'ie_key': 'BrightcoveNew', + 'title': title, + 'description': description, + 'duration': duration, + 'creator': presenter_name, + 'location': location, + 'series': event_name, + } + + +class CiscoLiveSessionIE(CiscoLiveBaseIE): + _VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/[^#]*#/session/(?P[^/?&]+)' + _TESTS = [{ + 'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs', + 'md5': 'c98acf395ed9c9f766941c70f5352e22', + 'info_dict': { + 'id': '5803694304001', + 'ext': 'mp4', + 'title': '13 Smart Automations to Monitor Your Cisco IOS Network', + 'description': 'md5:ec4a436019e09a918dec17714803f7cc', + 'timestamp': 1530305395, + 'upload_date': '20180629', + 'uploader_id': '5647924234001', + 'location': '16B Mezz.', + }, + }, { + 'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.event=ciscoliveemea2019#/session/15361595531500013WOU', + 'only_matching': True, + }, { + 'url': 'https://www.ciscolive.com/global/on-demand-library.html?#/session/1490051371645001kNaS', + 'only_matching': True, + }] + + def _real_extract(self, url): + rf_id = self._match_id(url) + rf_result = self._call_api('session', rf_id, {'id': rf_id}, url) + return self._parse_rf_item(rf_result['items'][0]) + + +class CiscoLiveSearchIE(CiscoLiveBaseIE): + _VALID_URL = r'https?://(?:www\.)?ciscolive(?:\.cisco)?\.com/(?:global/)?on-demand-library(?:\.html|/)' + _TESTS = [{ + 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/', + 'info_dict': { + 'title': 'Search query', + }, + 'playlist_count': 5, + }, { + 'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/', + 'only_matching': True, + }, { + 'url': 'https://www.ciscolive.com/global/on-demand-library.html?search.technicallevel=scpsSkillLevel_aintroductory&search.event=ciscoliveemea2019&search.technology=scpsTechnology_dataCenter&search.focus=scpsSessionFocus_bestPractices#/', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url) + + @staticmethod + def _check_bc_id_exists(rf_item): + return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None + + def _entries(self, query, url): + query['size'] = 50 + query['from'] = 0 + for page_num in itertools.count(1): + results = self._call_api( + 'search', None, query, url, + 'Downloading search JSON page %d' % page_num) + sl = try_get(results, lambda x: x['sectionList'][0], dict) + if sl: + results = sl + items = results.get('items') + if not items or not isinstance(items, list): + break + for item in items: + if not isinstance(item, dict): + continue + if not self._check_bc_id_exists(item): + continue + yield self._parse_rf_item(item) + size = int_or_none(results.get('size')) + if size is not None: + query['size'] = size + total = int_or_none(results.get('total')) + if total is not None and query['from'] + query['size'] > total: + break + query['from'] += query['size'] + + def _real_extract(self, url): + query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + query['type'] = 'session' + return self.playlist_result( + self._entries(query, url), playlist_title='Search query') diff --git a/youtube_dl/extractor/cjsw.py b/youtube_dl/extractor/cjsw.py new file mode 100644 index 000000000..505bdbe16 --- /dev/null +++ b/youtube_dl/extractor/cjsw.py @@ -0,0 +1,72 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + unescapeHTML, +) + + +class CJSWIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?cjsw\.com/program/(?P[^/]+)/episode/(?P\d+)' + _TESTS = [{ + 'url': 'http://cjsw.com/program/freshly-squeezed/episode/20170620', + 'md5': 'cee14d40f1e9433632c56e3d14977120', + 'info_dict': { + 'id': '91d9f016-a2e7-46c5-8dcb-7cbcd7437c41', + 'ext': 'mp3', + 'title': 'Freshly Squeezed – Episode June 20, 2017', + 'description': 'md5:c967d63366c3898a80d0c7b0ff337202', + 'series': 'Freshly Squeezed', + 'episode_id': '20170620', + }, + }, { + # no description + 'url': 'http://cjsw.com/program/road-pops/episode/20170707/', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + program, episode_id = mobj.group('program', 'id') + audio_id = '%s/%s' % (program, episode_id) + + webpage = self._download_webpage(url, episode_id) + + title = unescapeHTML(self._search_regex( + (r']+class=["\']episode-header__title["\'][^>]*>(?P[^<]+)', + r'data-audio-title=(["\'])(?P<title>(?:(?!\1).)+)\1'), + webpage, 'title', group='title')) + + audio_url = self._search_regex( + r'<button[^>]+data-audio-src=(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'audio url', group='url') + + audio_id = self._search_regex( + r'/([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})\.mp3', + audio_url, 'audio id', default=audio_id) + + formats = [{ + 'url': audio_url, + 'ext': determine_ext(audio_url, 'mp3'), + 'vcodec': 'none', + }] + + description = self._html_search_regex( + r'<p>(?P<description>.+?)</p>', webpage, 'description', + default=None) + series = self._search_regex( + r'data-showname=(["\'])(?P<name>(?:(?!\1).)+)\1', webpage, + 'series', default=program, group='name') + + return { + 'id': audio_id, + 'title': title, + 'description': description, + 'formats': formats, + 'series': series, + 'episode_id': episode_id, + } diff --git a/youtube_dl/extractor/cliphunter.py b/youtube_dl/extractor/cliphunter.py new file mode 100644 index 000000000..f2ca7a337 --- /dev/null +++ b/youtube_dl/extractor/cliphunter.py @@ -0,0 +1,79 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + url_or_none, +) + + +class CliphunterIE(InfoExtractor): + IE_NAME = 'cliphunter' + + _VALID_URL = r'''(?x)https?://(?:www\.)?cliphunter\.com/w/ + (?P<id>[0-9]+)/ + (?P<seo>.+?)(?:$|[#\?]) + ''' + _TESTS = [{ + 'url': 'http://www.cliphunter.com/w/1012420/Fun_Jynx_Maze_solo', + 'md5': 'b7c9bbd4eb3a226ab91093714dcaa480', + 'info_dict': { + 'id': '1012420', + 'ext': 'flv', + 'title': 'Fun Jynx Maze solo', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 18, + }, + 'skip': 'Video gone', + }, { + 'url': 'http://www.cliphunter.com/w/2019449/ShesNew__My_booty_girlfriend_Victoria_Paradices_pussy_filled_with_jizz', + 'md5': '55a723c67bfc6da6b0cfa00d55da8a27', + 'info_dict': { + 'id': '2019449', + 'ext': 'mp4', + 'title': 'ShesNew - My booty girlfriend, Victoria Paradice\'s pussy filled with jizz', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 18, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_title = self._search_regex( + r'mediaTitle = "([^"]+)"', webpage, 'title') + + gexo_files = self._parse_json( + self._search_regex( + r'var\s+gexoFiles\s*=\s*({.+?});', webpage, 'gexo files'), + video_id) + + formats = [] + for format_id, f in gexo_files.items(): + video_url = url_or_none(f.get('url')) + if not video_url: + continue + fmt = f.get('fmt') + height = f.get('h') + format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'width': int_or_none(f.get('w')), + 'height': int_or_none(height), + 'tbr': int_or_none(f.get('br')), + }) + self._sort_formats(formats) + + thumbnail = self._search_regex( + r"var\s+mov_thumb\s*=\s*'([^']+)';", + webpage, 'thumbnail', fatal=False) + + return { + 'id': video_id, + 'title': video_title, + 'formats': formats, + 'age_limit': self._rta_search(webpage), + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/clippit.py b/youtube_dl/extractor/clippit.py new file mode 100644 index 000000000..a1a7a774c --- /dev/null +++ b/youtube_dl/extractor/clippit.py @@ -0,0 +1,74 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + parse_iso8601, + qualities, +) + +import re + + +class ClippitIE(InfoExtractor): + + _VALID_URL = r'https?://(?:www\.)?clippituser\.tv/c/(?P<id>[a-z]+)' + _TEST = { + 'url': 'https://www.clippituser.tv/c/evmgm', + 'md5': '963ae7a59a2ec4572ab8bf2f2d2c5f09', + 'info_dict': { + 'id': 'evmgm', + 'ext': 'mp4', + 'title': 'Bye bye Brutus. #BattleBots - Clippit', + 'uploader': 'lizllove', + 'uploader_url': 'https://www.clippituser.tv/p/lizllove', + 'timestamp': 1472183818, + 'upload_date': '20160826', + 'description': 'BattleBots | ABC', + 'thumbnail': r're:^https?://.*\.jpg$', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'<title.*>(.+?)', webpage, 'title') + + FORMATS = ('sd', 'hd') + quality = qualities(FORMATS) + formats = [] + for format_id in FORMATS: + url = self._html_search_regex(r'data-%s-file="(.+?)"' % format_id, + webpage, 'url', fatal=False) + if not url: + continue + match = re.search(r'/(?P\d+)\.mp4', url) + formats.append({ + 'url': url, + 'format_id': format_id, + 'quality': quality(format_id), + 'height': int(match.group('height')) if match else None, + }) + + uploader = self._html_search_regex(r'class="username".*>\s+(.+?)\n', + webpage, 'uploader', fatal=False) + uploader_url = ('https://www.clippituser.tv/p/' + uploader + if uploader else None) + + timestamp = self._html_search_regex(r'datetime="(.+?)"', + webpage, 'date', fatal=False) + thumbnail = self._html_search_regex(r'data-image="(.+?)"', + webpage, 'thumbnail', fatal=False) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'uploader': uploader, + 'uploader_url': uploader_url, + 'timestamp': parse_iso8601(timestamp), + 'description': self._og_search_description(webpage), + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/cliprs.py b/youtube_dl/extractor/cliprs.py new file mode 100644 index 000000000..d55b26d59 --- /dev/null +++ b/youtube_dl/extractor/cliprs.py @@ -0,0 +1,33 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .onet import OnetBaseIE + + +class ClipRsIE(OnetBaseIE): + _VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P[^/]+)/\d+' + _TEST = { + 'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732', + 'md5': 'c412d57815ba07b56f9edc7b5d6a14e5', + 'info_dict': { + 'id': '1488842.1399140381', + 'ext': 'mp4', + 'title': 'PREMIJERA Frajle predstavljaju novi spot za pesmu Moli me, moli', + 'description': 'md5:56ce2c3b4ab31c5a2e0b17cb9a453026', + 'duration': 229, + 'timestamp': 1459850243, + 'upload_date': '20160405', + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + mvp_id = self._search_mvp_id(webpage) + + info_dict = self._extract_from_id(mvp_id, webpage) + info_dict['display_id'] = display_id + + return info_dict diff --git a/youtube_dl/extractor/clipsyndicate.py b/youtube_dl/extractor/clipsyndicate.py new file mode 100644 index 000000000..6cdb42f5a --- /dev/null +++ b/youtube_dl/extractor/clipsyndicate.py @@ -0,0 +1,54 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + find_xpath_attr, + fix_xml_ampersands +) + + +class ClipsyndicateIE(InfoExtractor): + _VALID_URL = r'https?://(?:chic|www)\.clipsyndicate\.com/video/play(list/\d+)?/(?P\d+)' + + _TESTS = [{ + 'url': 'http://www.clipsyndicate.com/video/play/4629301/brick_briscoe', + 'md5': '4d7d549451bad625e0ff3d7bd56d776c', + 'info_dict': { + 'id': '4629301', + 'ext': 'mp4', + 'title': 'Brick Briscoe', + 'duration': 612, + 'thumbnail': r're:^https?://.+\.jpg', + }, + }, { + 'url': 'http://chic.clipsyndicate.com/video/play/5844117/shark_attack', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + js_player = self._download_webpage( + 'http://eplayer.clipsyndicate.com/embed/player.js?va_id=%s' % video_id, + video_id, 'Downlaoding player') + # it includes a required token + flvars = self._search_regex(r'flvars: "(.*?)"', js_player, 'flvars') + + pdoc = self._download_xml( + 'http://eplayer.clipsyndicate.com/osmf/playlist?%s' % flvars, + video_id, 'Downloading video info', + transform_source=fix_xml_ampersands) + + track_doc = pdoc.find('trackList/track') + + def find_param(name): + node = find_xpath_attr(track_doc, './/param', 'name', name) + if node is not None: + return node.attrib['value'] + + return { + 'id': video_id, + 'title': find_param('title'), + 'url': track_doc.find('location').text, + 'thumbnail': find_param('thumbnail'), + 'duration': int(find_param('duration')), + } diff --git a/youtube_dl/extractor/closertotruth.py b/youtube_dl/extractor/closertotruth.py new file mode 100644 index 000000000..26243d52d --- /dev/null +++ b/youtube_dl/extractor/closertotruth.py @@ -0,0 +1,92 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class CloserToTruthIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688', + 'info_dict': { + 'id': '0_zof1ktre', + 'display_id': 'solutions-the-mind-body-problem', + 'ext': 'mov', + 'title': 'Solutions to the Mind-Body Problem?', + 'upload_date': '20140221', + 'timestamp': 1392956007, + 'uploader_id': 'CTTXML' + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://closertotruth.com/episodes/how-do-brains-work', + 'info_dict': { + 'id': '0_iuxai6g6', + 'display_id': 'how-do-brains-work', + 'ext': 'mov', + 'title': 'How do Brains Work?', + 'upload_date': '20140221', + 'timestamp': 1392956024, + 'uploader_id': 'CTTXML' + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://closertotruth.com/interviews/1725', + 'info_dict': { + 'id': '1725', + 'title': 'AyaFr-002', + }, + 'playlist_mincount': 2, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + partner_id = self._search_regex( + r']+src=["\'].*?\b(?:partner_id|p)/(\d+)', + webpage, 'kaltura partner_id') + + title = self._search_regex( + r'(.+?)\s*\|\s*.+?', webpage, 'video title') + + select = self._search_regex( + r'(?s)]+id="select-version"[^>]*>(.+?)', + webpage, 'select version', default=None) + if select: + entry_ids = set() + entries = [] + for mobj in re.finditer( + r']+value=(["\'])(?P[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P[^<]+)', + webpage): + entry_id = mobj.group('id') + if entry_id in entry_ids: + continue + entry_ids.add(entry_id) + entries.append({ + '_type': 'url_transparent', + 'url': 'kaltura:%s:%s' % (partner_id, entry_id), + 'ie_key': 'Kaltura', + 'title': mobj.group('title'), + }) + if entries: + return self.playlist_result(entries, display_id, title) + + entry_id = self._search_regex( + r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2', + webpage, 'kaltura entry_id', group='id') + + return { + '_type': 'url_transparent', + 'display_id': display_id, + 'url': 'kaltura:%s:%s' % (partner_id, entry_id), + 'ie_key': 'Kaltura', + 'title': title + } diff --git a/youtube_dl/extractor/cloudflarestream.py b/youtube_dl/extractor/cloudflarestream.py new file mode 100644 index 000000000..8ff2c6531 --- /dev/null +++ b/youtube_dl/extractor/cloudflarestream.py @@ -0,0 +1,63 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class CloudflareStreamIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + (?:watch\.)?(?:cloudflarestream\.com|videodelivery\.net)/| + embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo= + ) + (?P<id>[\da-f]+) + ''' + _TESTS = [{ + 'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717', + 'info_dict': { + 'id': '31c9291ab41fac05471db4e73aa11717', + 'ext': 'mp4', + 'title': '31c9291ab41fac05471db4e73aa11717', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1', + 'only_matching': True, + }, { + 'url': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/manifest/video.mpd', + 'only_matching': True, + }, { + 'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') + for mobj in re.finditer( + r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo=[\da-f]+?.*?)\1', + webpage)] + + def _real_extract(self, url): + video_id = self._match_id(url) + + formats = self._extract_m3u8_formats( + 'https://cloudflarestream.com/%s/manifest/video.m3u8' % video_id, + video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False) + formats.extend(self._extract_mpd_formats( + 'https://cloudflarestream.com/%s/manifest/video.mpd' % video_id, + video_id, mpd_id='dash', fatal=False)) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_id, + 'formats': formats, + } diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dl/extractor/cloudy.py new file mode 100644 index 000000000..85ca20ecc --- /dev/null +++ b/youtube_dl/extractor/cloudy.py @@ -0,0 +1,60 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + str_to_int, + unified_strdate, +) + + +class CloudyIE(InfoExtractor): + _IE_DESC = 'cloudy.ec' + _VALID_URL = r'https?://(?:www\.)?cloudy\.ec/(?:v/|embed\.php\?.*?\bid=)(?P<id>[A-Za-z0-9]+)' + _TESTS = [{ + 'url': 'https://www.cloudy.ec/v/af511e2527aac', + 'md5': '29832b05028ead1b58be86bf319397ca', + 'info_dict': { + 'id': 'af511e2527aac', + 'ext': 'mp4', + 'title': 'Funny Cats and Animals Compilation june 2013', + 'upload_date': '20130913', + 'view_count': int, + } + }, { + 'url': 'http://www.cloudy.ec/embed.php?autoplay=1&id=af511e2527aac', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'https://www.cloudy.ec/embed.php', video_id, query={ + 'id': video_id, + 'playerPage': 1, + 'autoplay': 1, + }) + + info = self._parse_html5_media_entries(url, webpage, video_id)[0] + + webpage = self._download_webpage( + 'https://www.cloudy.ec/v/%s' % video_id, video_id, fatal=False) + + if webpage: + info.update({ + 'title': self._search_regex( + r'<h\d[^>]*>([^<]+)<', webpage, 'title'), + 'upload_date': unified_strdate(self._search_regex( + r'>Published at (\d{4}-\d{1,2}-\d{1,2})', webpage, + 'upload date', fatal=False)), + 'view_count': str_to_int(self._search_regex( + r'([\d,.]+) views<', webpage, 'view count', fatal=False)), + }) + + if not info.get('title'): + info['title'] = video_id + + info['id'] = video_id + + return info diff --git a/youtube_dl/extractor/clubic.py b/youtube_dl/extractor/clubic.py new file mode 100644 index 000000000..98f9cb596 --- /dev/null +++ b/youtube_dl/extractor/clubic.py @@ -0,0 +1,56 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + clean_html, + qualities, +) + + +class ClubicIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?clubic\.com/video/(?:[^/]+/)*video.*-(?P<id>[0-9]+)\.html' + + _TESTS = [{ + 'url': 'http://www.clubic.com/video/clubic-week/video-clubic-week-2-0-le-fbi-se-lance-dans-la-photo-d-identite-448474.html', + 'md5': '1592b694ba586036efac1776b0b43cd3', + 'info_dict': { + 'id': '448474', + 'ext': 'mp4', + 'title': 'Clubic Week 2.0 : le FBI se lance dans la photo d\u0092identité', + 'description': 're:Gueule de bois chez Nokia. Le constructeur a indiqué cette.*', + 'thumbnail': r're:^http://img\.clubic\.com/.*\.jpg$', + } + }, { + 'url': 'http://www.clubic.com/video/video-clubic-week-2-0-apple-iphone-6s-et-plus-mais-surtout-le-pencil-469792.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + player_url = 'http://player.m6web.fr/v1/player/clubic/%s.html' % video_id + player_page = self._download_webpage(player_url, video_id) + + config = self._parse_json(self._search_regex( + r'(?m)M6\.Player\.config\s*=\s*(\{.+?\});$', player_page, + 'configuration'), video_id) + + video_info = config['videoInfo'] + sources = config['sources'] + quality_order = qualities(['sd', 'hq']) + + formats = [{ + 'format_id': src['streamQuality'], + 'url': src['src'], + 'quality': quality_order(src['streamQuality']), + } for src in sources] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_info['title'], + 'formats': formats, + 'description': clean_html(video_info.get('description')), + 'thumbnail': config.get('poster'), + } diff --git a/youtube_dl/extractor/clyp.py b/youtube_dl/extractor/clyp.py new file mode 100644 index 000000000..06d04de13 --- /dev/null +++ b/youtube_dl/extractor/clyp.py @@ -0,0 +1,82 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_urlparse, +) +from ..utils import ( + float_or_none, + unified_timestamp, +) + + +class ClypIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?clyp\.it/(?P<id>[a-z0-9]+)' + _TESTS = [{ + 'url': 'https://clyp.it/ojz2wfah', + 'md5': '1d4961036c41247ecfdcc439c0cddcbb', + 'info_dict': { + 'id': 'ojz2wfah', + 'ext': 'mp3', + 'title': 'Krisson80 - bits wip wip', + 'description': '#Krisson80BitsWipWip #chiptune\n#wip', + 'duration': 263.21, + 'timestamp': 1443515251, + 'upload_date': '20150929', + }, + }, { + 'url': 'https://clyp.it/b04p1odi?token=b0078e077e15835845c528a44417719d', + 'info_dict': { + 'id': 'b04p1odi', + 'ext': 'mp3', + 'title': 'GJ! (Reward Edit)', + 'description': 'Metal Resistance (THE ONE edition)', + 'duration': 177.789, + 'timestamp': 1528241278, + 'upload_date': '20180605', + }, + 'params': { + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + audio_id = self._match_id(url) + + qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + token = qs.get('token', [None])[0] + + query = {} + if token: + query['token'] = token + + metadata = self._download_json( + 'https://api.clyp.it/%s' % audio_id, audio_id, query=query) + + formats = [] + for secure in ('', 'Secure'): + for ext in ('Ogg', 'Mp3'): + format_id = '%s%s' % (secure, ext) + format_url = metadata.get('%sUrl' % format_id) + if format_url: + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'vcodec': 'none', + }) + self._sort_formats(formats) + + title = metadata['Title'] + description = metadata.get('Description') + duration = float_or_none(metadata.get('Duration')) + timestamp = unified_timestamp(metadata.get('DateCreated')) + + return { + 'id': audio_id, + 'title': title, + 'description': description, + 'duration': duration, + 'timestamp': timestamp, + 'formats': formats, + } diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py new file mode 100644 index 000000000..e701fbeab --- /dev/null +++ b/youtube_dl/extractor/cmt.py @@ -0,0 +1,54 @@ +from __future__ import unicode_literals + +from .mtv import MTVIE + + +class CMTIE(MTVIE): + IE_NAME = 'cmt.com' + _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows|(?:full-)?episodes|video-clips)/(?P<id>[^/]+)' + + _TESTS = [{ + 'url': 'http://www.cmt.com/videos/garth-brooks/989124/the-call-featuring-trisha-yearwood.jhtml#artist=30061', + 'md5': 'e6b7ef3c4c45bbfae88061799bbba6c2', + 'info_dict': { + 'id': '989124', + 'ext': 'mp4', + 'title': 'Garth Brooks - "The Call (featuring Trisha Yearwood)"', + 'description': 'Blame It All On My Roots', + }, + 'skip': 'Video not available', + }, { + 'url': 'http://www.cmt.com/videos/misc/1504699/still-the-king-ep-109-in-3-minutes.jhtml#id=1739908', + 'md5': 'e61a801ca4a183a466c08bd98dccbb1c', + 'info_dict': { + 'id': '1504699', + 'ext': 'mp4', + 'title': 'Still The King Ep. 109 in 3 Minutes', + 'description': 'Relive or catch up with Still The King by watching this recap of season 1, episode 9.', + 'timestamp': 1469421000.0, + 'upload_date': '20160725', + }, + }, { + 'url': 'http://www.cmt.com/shows/party-down-south/party-down-south-ep-407-gone-girl/1738172/playlist/#id=1738172', + 'only_matching': True, + }, { + 'url': 'http://www.cmt.com/full-episodes/537qb3/nashville-the-wayfaring-stranger-season-5-ep-501', + 'only_matching': True, + }, { + 'url': 'http://www.cmt.com/video-clips/t9e4ci/nashville-juliette-in-2-minutes', + 'only_matching': True, + }] + + def _extract_mgid(self, webpage): + mgid = self._search_regex( + r'MTVN\.VIDEO\.contentUri\s*=\s*([\'"])(?P<mgid>.+?)\1', + webpage, 'mgid', group='mgid', default=None) + if not mgid: + mgid = self._extract_triforce_mgid(webpage) + return mgid + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + mgid = self._extract_mgid(webpage) + return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid) diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dl/extractor/cnbc.py new file mode 100644 index 000000000..6889b0f40 --- /dev/null +++ b/youtube_dl/extractor/cnbc.py @@ -0,0 +1,66 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +from .common import InfoExtractor +from ..utils import smuggle_url + + +class CNBCIE(InfoExtractor): + _VALID_URL = r'https?://video\.cnbc\.com/gallery/\?video=(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://video.cnbc.com/gallery/?video=3000503714', + 'info_dict': { + 'id': '3000503714', + 'ext': 'mp4', + 'title': 'Fighting zombies is big business', + 'description': 'md5:0c100d8e1a7947bd2feec9a5550e519e', + 'timestamp': 1459332000, + 'upload_date': '20160330', + 'uploader': 'NBCU-CNBC', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'url': smuggle_url( + 'http://link.theplatform.com/s/gZWlPC/media/guid/2408950221/%s?mbr=true&manifest=m3u' % video_id, + {'force_smil_url': True}), + 'id': video_id, + } + + +class CNBCVideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)' + _TEST = { + 'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html', + 'info_dict': { + 'id': '7000031301', + 'ext': 'mp4', + 'title': "Trump: I don't necessarily agree with raising rates", + 'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3', + 'timestamp': 1531958400, + 'upload_date': '20180719', + 'uploader': 'NBCU-CNBC', + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id, + 'video id') + return self.url_result( + 'http://video.cnbc.com/gallery/?video=%s' % video_id, + CNBCIE.ie_key()) diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py new file mode 100644 index 000000000..774b71055 --- /dev/null +++ b/youtube_dl/extractor/cnn.py @@ -0,0 +1,144 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .turner import TurnerBaseIE +from ..utils import url_basename + + +class CNNIE(TurnerBaseIE): + _VALID_URL = r'''(?x)https?://(?:(?P<sub_domain>edition|www|money)\.)?cnn\.com/(?:video/(?:data/.+?|\?)/)?videos?/ + (?P<path>.+?/(?P<title>[^/]+?)(?:\.(?:[a-z\-]+)|(?=&)))''' + + _TESTS = [{ + 'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', + 'md5': '3e6121ea48df7e2259fe73a0628605c4', + 'info_dict': { + 'id': 'sports/2013/06/09/nadal-1-on-1.cnn', + 'ext': 'mp4', + 'title': 'Nadal wins 8th French Open title', + 'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', + 'duration': 135, + 'upload_date': '20130609', + }, + 'expected_warnings': ['Failed to download m3u8 information'], + }, { + 'url': 'http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29', + 'md5': 'b5cc60c60a3477d185af8f19a2a26f4e', + 'info_dict': { + 'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology', + 'ext': 'mp4', + 'title': "Student's epic speech stuns new freshmen", + 'description': "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"", + 'upload_date': '20130821', + }, + 'expected_warnings': ['Failed to download m3u8 information'], + }, { + 'url': 'http://www.cnn.com/video/data/2.0/video/living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln.html', + 'md5': 'f14d02ebd264df951feb2400e2c25a1b', + 'info_dict': { + 'id': 'living/2014/12/22/growing-america-nashville-salemtown-board-episode-1.hln', + 'ext': 'mp4', + 'title': 'Nashville Ep. 1: Hand crafted skateboards', + 'description': 'md5:e7223a503315c9f150acac52e76de086', + 'upload_date': '20141222', + }, + 'expected_warnings': ['Failed to download m3u8 information'], + }, { + 'url': 'http://money.cnn.com/video/news/2016/08/19/netflix-stunning-stats.cnnmoney/index.html', + 'md5': '52a515dc1b0f001cd82e4ceda32be9d1', + 'info_dict': { + 'id': '/video/news/2016/08/19/netflix-stunning-stats.cnnmoney', + 'ext': 'mp4', + 'title': '5 stunning stats about Netflix', + 'description': 'Did you know that Netflix has more than 80 million members? Here are five facts about the online video distributor that you probably didn\'t know.', + 'upload_date': '20160819', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://cnn.com/video/?/video/politics/2015/03/27/pkg-arizona-senator-church-attendance-mandatory.ktvk', + 'only_matching': True, + }, { + 'url': 'http://cnn.com/video/?/video/us/2015/04/06/dnt-baker-refuses-anti-gay-order.wkmg', + 'only_matching': True, + }, { + 'url': 'http://edition.cnn.com/videos/arts/2016/04/21/olympic-games-cultural-a-z-brazil.cnn', + 'only_matching': True, + }] + + _CONFIG = { + # http://edition.cnn.com/.element/apps/cvp/3.0/cfg/spider/cnn/expansion/config.xml + 'edition': { + 'data_src': 'http://edition.cnn.com/video/data/3.0/video/%s/index.xml', + 'media_src': 'http://pmd.cdn.turner.com/cnn/big', + }, + # http://money.cnn.com/.element/apps/cvp2/cfg/config.xml + 'money': { + 'data_src': 'http://money.cnn.com/video/data/4.0/video/%s.xml', + 'media_src': 'http://ht3.cdn.turner.com/money/big', + }, + } + + def _extract_timestamp(self, video_data): + # TODO: fix timestamp extraction + return None + + def _real_extract(self, url): + sub_domain, path, page_title = re.match(self._VALID_URL, url).groups() + if sub_domain not in ('money', 'edition'): + sub_domain = 'edition' + config = self._CONFIG[sub_domain] + return self._extract_cvp_info( + config['data_src'] % path, page_title, { + 'default': { + 'media_src': config['media_src'], + } + }) + + +class CNNBlogsIE(InfoExtractor): + _VALID_URL = r'https?://[^\.]+\.blogs\.cnn\.com/.+' + _TEST = { + 'url': 'http://reliablesources.blogs.cnn.com/2014/02/09/criminalizing-journalism/', + 'md5': '3e56f97b0b6ffb4b79f4ea0749551084', + 'info_dict': { + 'id': 'bestoftv/2014/02/09/criminalizing-journalism.cnn', + 'ext': 'mp4', + 'title': 'Criminalizing journalism?', + 'description': 'Glenn Greenwald responds to comments made this week on Capitol Hill that journalists could be criminal accessories.', + 'upload_date': '20140209', + }, + 'expected_warnings': ['Failed to download m3u8 information'], + 'add_ie': ['CNN'], + } + + def _real_extract(self, url): + webpage = self._download_webpage(url, url_basename(url)) + cnn_url = self._html_search_regex(r'data-url="(.+?)"', webpage, 'cnn url') + return self.url_result(cnn_url, CNNIE.ie_key()) + + +class CNNArticleIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:edition|www)\.)?cnn\.com/(?!videos?/)' + _TEST = { + 'url': 'http://www.cnn.com/2014/12/21/politics/obama-north-koreas-hack-not-war-but-cyber-vandalism/', + 'md5': '689034c2a3d9c6dc4aa72d65a81efd01', + 'info_dict': { + 'id': 'bestoftv/2014/12/21/ip-north-korea-obama.cnn', + 'ext': 'mp4', + 'title': 'Obama: Cyberattack not an act of war', + 'description': 'md5:0a802a40d2376f60e6b04c8d5bcebc4b', + 'upload_date': '20141221', + }, + 'expected_warnings': ['Failed to download m3u8 information'], + 'add_ie': ['CNN'], + } + + def _real_extract(self, url): + webpage = self._download_webpage(url, url_basename(url)) + cnn_url = self._html_search_regex(r"video:\s*'([^']+)'", webpage, 'cnn url') + return self.url_result('http://cnn.com/video/?/video/' + cnn_url, CNNIE.ie_key()) diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py new file mode 100644 index 000000000..588aad0d9 --- /dev/null +++ b/youtube_dl/extractor/comcarcoff.py @@ -0,0 +1,74 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + parse_duration, + parse_iso8601, +) + + +class ComCarCoffIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)' + _TESTS = [{ + 'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/', + 'info_dict': { + 'id': '2494164', + 'ext': 'mp4', + 'upload_date': '20141127', + 'timestamp': 1417107600, + 'duration': 1232, + 'title': 'Happy Thanksgiving Miranda', + 'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.', + }, + 'params': { + 'skip_download': 'requires ffmpeg', + } + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + if not display_id: + display_id = 'comediansincarsgettingcoffee.com' + webpage = self._download_webpage(url, display_id) + + full_data = self._parse_json( + self._search_regex( + r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'), + display_id)['videoData'] + + display_id = full_data['activeVideo']['video'] + video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id] + + video_id = compat_str(video_data['mediaId']) + title = video_data['title'] + formats = self._extract_m3u8_formats( + video_data['mediaUrl'], video_id, 'mp4') + self._sort_formats(formats) + + thumbnails = [{ + 'url': video_data['images']['thumb'], + }, { + 'url': video_data['images']['poster'], + }] + + timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601( + video_data.get('pubDate')) + duration = int_or_none(video_data.get('durationSeconds')) or parse_duration( + video_data.get('duration')) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': video_data.get('description'), + 'timestamp': timestamp, + 'duration': duration, + 'thumbnails': thumbnails, + 'formats': formats, + 'season_number': int_or_none(video_data.get('season')), + 'episode_number': int_or_none(video_data.get('episode')), + 'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))), + } diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py new file mode 100644 index 000000000..d08b909a6 --- /dev/null +++ b/youtube_dl/extractor/comedycentral.py @@ -0,0 +1,142 @@ +from __future__ import unicode_literals + +from .mtv import MTVServicesInfoExtractor +from .common import InfoExtractor + + +class ComedyCentralIE(MTVServicesInfoExtractor): + _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/ + (video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes))) + /(?P<title>.*)''' + _FEED_URL = 'http://comedycentral.com/feeds/mrss/' + + _TESTS = [{ + 'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother', + 'md5': 'c4f48e9eda1b16dd10add0744344b6d8', + 'info_dict': { + 'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354', + 'ext': 'mp4', + 'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother', + 'description': 'After a certain point, breastfeeding becomes c**kblocking.', + 'timestamp': 1376798400, + 'upload_date': '20130818', + }, + }, { + 'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview', + 'only_matching': True, + }] + + +class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor): + _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/ + (?:full-episodes|shows(?=/[^/]+/full-episodes)) + /(?P<id>[^?]+)''' + _FEED_URL = 'http://comedycentral.com/feeds/mrss/' + + _TESTS = [{ + 'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028', + 'info_dict': { + 'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."', + 'title': 'November 28, 2016 - Ryan Speedo Green', + }, + 'playlist_count': 4, + }, { + 'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + mgid = self._extract_triforce_mgid(webpage, data_zone='t2_lc_promo1') + videos_info = self._get_videos_info(mgid) + return videos_info + + +class ToshIE(MTVServicesInfoExtractor): + IE_DESC = 'Tosh.0' + _VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)' + _FEED_URL = 'http://tosh.cc.com/feeds/mrss' + + _TESTS = [{ + 'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans', + 'info_dict': { + 'description': 'Tosh asked fans to share their summer plans.', + 'title': 'Twitter Users Share Summer Plans', + }, + 'playlist': [{ + 'md5': 'f269e88114c1805bb6d7653fecea9e06', + 'info_dict': { + 'id': '90498ec2-ed00-11e0-aca6-0026b9414f30', + 'ext': 'mp4', + 'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans', + 'description': 'Tosh asked fans to share their summer plans.', + 'thumbnail': r're:^https?://.*\.jpg', + # It's really reported to be published on year 2077 + 'upload_date': '20770610', + 'timestamp': 3390510600, + 'subtitles': { + 'en': 'mincount:3', + }, + }, + }] + }, { + 'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp', + 'only_matching': True, + }] + + +class ComedyCentralTVIE(MTVServicesInfoExtractor): + _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4', + 'info_dict': { + 'id': 'local_playlist-f99b626bdfe13568579a', + 'ext': 'flv', + 'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1', + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, { + 'url': 'http://www.comedycentral.tv/shows/1074-workaholics', + 'only_matching': True, + }, { + 'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + mrss_url = self._search_regex( + r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'mrss url', group='url') + + return self._get_videos_info_from_url(mrss_url, video_id) + + +class ComedyCentralShortnameIE(InfoExtractor): + _VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$' + _TESTS = [{ + 'url': ':tds', + 'only_matching': True, + }, { + 'url': ':thedailyshow', + 'only_matching': True, + }, { + 'url': ':theopposition', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + shortcut_map = { + 'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes', + 'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes', + 'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes', + } + return self.url_result(shortcut_map[video_id]) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py new file mode 100644 index 000000000..9c3e9eec6 --- /dev/null +++ b/youtube_dl/extractor/common.py @@ -0,0 +1,2976 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import datetime +import hashlib +import json +import netrc +import os +import random +import re +import socket +import sys +import time +import math + +from ..compat import ( + compat_cookiejar, + compat_cookies, + compat_etree_Element, + compat_etree_fromstring, + compat_getpass, + compat_integer_types, + compat_http_client, + compat_os_name, + compat_str, + compat_urllib_error, + compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, + compat_urllib_request, + compat_urlparse, + compat_xml_parse_error, +) +from ..downloader.f4m import ( + get_base_url, + remove_encrypted_media, +) +from ..utils import ( + NO_DEFAULT, + age_restricted, + base_url, + bug_reports_message, + clean_html, + compiled_regex_type, + determine_ext, + determine_protocol, + dict_get, + error_to_compat_str, + ExtractorError, + extract_attributes, + fix_xml_ampersands, + float_or_none, + GeoRestrictedError, + GeoUtils, + int_or_none, + js_to_json, + JSON_LD_RE, + mimetype2ext, + orderedSet, + parse_bitrate, + parse_codecs, + parse_duration, + parse_iso8601, + parse_m3u8_attributes, + parse_resolution, + RegexNotFoundError, + sanitized_Request, + sanitize_filename, + str_or_none, + strip_or_none, + unescapeHTML, + unified_strdate, + unified_timestamp, + update_Request, + update_url_query, + urljoin, + url_basename, + url_or_none, + xpath_element, + xpath_text, + xpath_with_ns, +) + + +class InfoExtractor(object): + """Information Extractor class. + + Information extractors are the classes that, given a URL, extract + information about the video (or videos) the URL refers to. This + information includes the real video URL, the video title, author and + others. The information is stored in a dictionary which is then + passed to the YoutubeDL. The YoutubeDL processes this + information possibly downloading the video to the file system, among + other possible outcomes. + + The type field determines the type of the result. + By far the most common value (and the default if _type is missing) is + "video", which indicates a single video. + + For a video, the dictionaries must include the following fields: + + id: Video identifier. + title: Video title, unescaped. + + Additionally, it must contain either a formats entry or a url one: + + formats: A list of dictionaries for each format available, ordered + from worst to best quality. + + Potential fields: + * url The mandatory URL representing the media: + for plain file media - HTTP URL of this file, + for RTMP - RTMP URL, + for HLS - URL of the M3U8 media playlist, + for HDS - URL of the F4M manifest, + for DASH + - HTTP URL to plain file media (in case of + unfragmented media) + - URL of the MPD manifest or base URL + representing the media if MPD manifest + is parsed from a string (in case of + fragmented media) + for MSS - URL of the ISM manifest. + * manifest_url + The URL of the manifest file in case of + fragmented media: + for HLS - URL of the M3U8 master playlist, + for HDS - URL of the F4M manifest, + for DASH - URL of the MPD manifest, + for MSS - URL of the ISM manifest. + * ext Will be calculated from URL if missing + * format A human-readable description of the format + ("mp4 container with h264/opus"). + Calculated from the format_id, width, height. + and format_note fields if missing. + * format_id A short description of the format + ("mp4_h264_opus" or "19"). + Technically optional, but strongly recommended. + * format_note Additional info about the format + ("3D" or "DASH video") + * width Width of the video, if known + * height Height of the video, if known + * resolution Textual description of width and height + * tbr Average bitrate of audio and video in KBit/s + * abr Average audio bitrate in KBit/s + * acodec Name of the audio codec in use + * asr Audio sampling rate in Hertz + * vbr Average video bitrate in KBit/s + * fps Frame rate + * vcodec Name of the video codec in use + * container Name of the container format + * filesize The number of bytes, if known in advance + * filesize_approx An estimate for the number of bytes + * player_url SWF Player URL (used for rtmpdump). + * protocol The protocol that will be used for the actual + download, lower-case. + "http", "https", "rtsp", "rtmp", "rtmpe", + "m3u8", "m3u8_native" or "http_dash_segments". + * fragment_base_url + Base URL for fragments. Each fragment's path + value (if present) will be relative to + this URL. + * fragments A list of fragments of a fragmented media. + Each fragment entry must contain either an url + or a path. If an url is present it should be + considered by a client. Otherwise both path and + fragment_base_url must be present. Here is + the list of all potential fields: + * "url" - fragment's URL + * "path" - fragment's path relative to + fragment_base_url + * "duration" (optional, int or float) + * "filesize" (optional, int) + * preference Order number of this format. If this field is + present and not None, the formats get sorted + by this field, regardless of all other values. + -1 for default (order by other properties), + -2 or smaller for less than default. + < -1000 to hide the format (if there is + another one which is strictly better) + * language Language code, e.g. "de" or "en-US". + * language_preference Is this in the language mentioned in + the URL? + 10 if it's what the URL is about, + -1 for default (don't know), + -10 otherwise, other values reserved for now. + * quality Order number of the video quality of this + format, irrespective of the file format. + -1 for default (order by other properties), + -2 or smaller for less than default. + * source_preference Order number for this video source + (quality takes higher priority) + -1 for default (order by other properties), + -2 or smaller for less than default. + * http_headers A dictionary of additional HTTP headers + to add to the request. + * stretched_ratio If given and not 1, indicates that the + video's pixels are not square. + width : height ratio as float. + * no_resume The server does not support resuming the + (HTTP or RTMP) download. Boolean. + * downloader_options A dictionary of downloader options as + described in FileDownloader + + url: Final video URL. + ext: Video filename extension. + format: The video format, defaults to ext (used for --get-format) + player_url: SWF Player URL (used for rtmpdump). + + The following fields are optional: + + alt_title: A secondary title of the video. + display_id An alternative identifier for the video, not necessarily + unique, but available before title. Typically, id is + something like "4234987", title "Dancing naked mole rats", + and display_id "dancing-naked-mole-rats" + thumbnails: A list of dictionaries, with the following entries: + * "id" (optional, string) - Thumbnail format ID + * "url" + * "preference" (optional, int) - quality of the image + * "width" (optional, int) + * "height" (optional, int) + * "resolution" (optional, string "{width}x{height"}, + deprecated) + * "filesize" (optional, int) + thumbnail: Full URL to a video thumbnail image. + description: Full video description. + uploader: Full name of the video uploader. + license: License name the video is licensed under. + creator: The creator of the video. + release_date: The date (YYYYMMDD) when the video was released. + timestamp: UNIX timestamp of the moment the video became available. + upload_date: Video upload date (YYYYMMDD). + If not explicitly set, calculated from timestamp. + uploader_id: Nickname or id of the video uploader. + uploader_url: Full URL to a personal webpage of the video uploader. + channel: Full name of the channel the video is uploaded on. + Note that channel fields may or may not repeat uploader + fields. This depends on a particular extractor. + channel_id: Id of the channel. + channel_url: Full URL to a channel webpage. + location: Physical location where the video was filmed. + subtitles: The available subtitles as a dictionary in the format + {tag: subformats}. "tag" is usually a language code, and + "subformats" is a list sorted from lower to higher + preference, each element is a dictionary with the "ext" + entry and one of: + * "data": The subtitles file contents + * "url": A URL pointing to the subtitles file + "ext" will be calculated from URL if missing + automatic_captions: Like 'subtitles', used by the YoutubeIE for + automatically generated captions + duration: Length of the video in seconds, as an integer or float. + view_count: How many users have watched the video on the platform. + like_count: Number of positive ratings of the video + dislike_count: Number of negative ratings of the video + repost_count: Number of reposts of the video + average_rating: Average rating give by users, the scale used depends on the webpage + comment_count: Number of comments on the video + comments: A list of comments, each with one or more of the following + properties (all but one of text or html optional): + * "author" - human-readable name of the comment author + * "author_id" - user ID of the comment author + * "id" - Comment ID + * "html" - Comment as HTML + * "text" - Plain text of the comment + * "timestamp" - UNIX timestamp of comment + * "parent" - ID of the comment this one is replying to. + Set to "root" to indicate that this is a + comment to the original video. + age_limit: Age restriction for the video, as an integer (years) + webpage_url: The URL to the video webpage, if given to youtube-dl it + should allow to get the same result again. (It will be set + by YoutubeDL if it's missing) + categories: A list of categories that the video falls in, for example + ["Sports", "Berlin"] + tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"] + is_live: True, False, or None (=unknown). Whether this video is a + live stream that goes on instead of a fixed-length video. + start_time: Time in seconds where the reproduction should start, as + specified in the URL. + end_time: Time in seconds where the reproduction should end, as + specified in the URL. + chapters: A list of dictionaries, with the following entries: + * "start_time" - The start time of the chapter in seconds + * "end_time" - The end time of the chapter in seconds + * "title" (optional, string) + + The following fields should only be used when the video belongs to some logical + chapter or section: + + chapter: Name or title of the chapter the video belongs to. + chapter_number: Number of the chapter the video belongs to, as an integer. + chapter_id: Id of the chapter the video belongs to, as a unicode string. + + The following fields should only be used when the video is an episode of some + series, programme or podcast: + + series: Title of the series or programme the video episode belongs to. + season: Title of the season the video episode belongs to. + season_number: Number of the season the video episode belongs to, as an integer. + season_id: Id of the season the video episode belongs to, as a unicode string. + episode: Title of the video episode. Unlike mandatory video title field, + this field should denote the exact title of the video episode + without any kind of decoration. + episode_number: Number of the video episode within a season, as an integer. + episode_id: Id of the video episode, as a unicode string. + + The following fields should only be used when the media is a track or a part of + a music album: + + track: Title of the track. + track_number: Number of the track within an album or a disc, as an integer. + track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii), + as a unicode string. + artist: Artist(s) of the track. + genre: Genre(s) of the track. + album: Title of the album the track belongs to. + album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc). + album_artist: List of all artists appeared on the album (e.g. + "Ash Borer / Fell Voices" or "Various Artists", useful for splits + and compilations). + disc_number: Number of the disc or other physical medium the track belongs to, + as an integer. + release_year: Year (YYYY) when the album was released. + + Unless mentioned otherwise, the fields should be Unicode strings. + + Unless mentioned otherwise, None is equivalent to absence of information. + + + _type "playlist" indicates multiple videos. + There must be a key "entries", which is a list, an iterable, or a PagedList + object, each element of which is a valid dictionary by this specification. + + Additionally, playlists can have "id", "title", "description", "uploader", + "uploader_id", "uploader_url" attributes with the same semantics as videos + (see above). + + + _type "multi_video" indicates that there are multiple videos that + form a single show, for examples multiple acts of an opera or TV episode. + It must have an entries key like a playlist and contain all the keys + required for a video at the same time. + + + _type "url" indicates that the video must be extracted from another + location, possibly by a different extractor. Its only required key is: + "url" - the next URL to extract. + The key "ie_key" can be set to the class name (minus the trailing "IE", + e.g. "Youtube") if the extractor class is known in advance. + Additionally, the dictionary may have any properties of the resolved entity + known in advance, for example "title" if the title of the referred video is + known ahead of time. + + + _type "url_transparent" entities have the same specification as "url", but + indicate that the given additional information is more precise than the one + associated with the resolved URL. + This is useful when a site employs a video service that hosts the video and + its technical metadata, but that video service does not embed a useful + title, description etc. + + + Subclasses of this one should re-define the _real_initialize() and + _real_extract() methods and define a _VALID_URL regexp. + Probably, they should also be added to the list of extractors. + + _GEO_BYPASS attribute may be set to False in order to disable + geo restriction bypass mechanisms for a particular extractor. + Though it won't disable explicit geo restriction bypass based on + country code provided with geo_bypass_country. + + _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted + countries for this extractor. One of these countries will be used by + geo restriction bypass mechanism right away in order to bypass + geo restriction, of course, if the mechanism is not disabled. + + _GEO_IP_BLOCKS attribute may contain a list of presumably geo unrestricted + IP blocks in CIDR notation for this extractor. One of these IP blocks + will be used by geo restriction bypass mechanism similarly + to _GEO_COUNTRIES. + + Finally, the _WORKING attribute should be set to False for broken IEs + in order to warn the users and skip the tests. + """ + + _ready = False + _downloader = None + _x_forwarded_for_ip = None + _GEO_BYPASS = True + _GEO_COUNTRIES = None + _GEO_IP_BLOCKS = None + _WORKING = True + + def __init__(self, downloader=None): + """Constructor. Receives an optional downloader.""" + self._ready = False + self._x_forwarded_for_ip = None + self.set_downloader(downloader) + + @classmethod + def suitable(cls, url): + """Receives a URL and returns True if suitable for this IE.""" + + # This does not use has/getattr intentionally - we want to know whether + # we have cached the regexp for *this* class, whereas getattr would also + # match the superclass + if '_VALID_URL_RE' not in cls.__dict__: + cls._VALID_URL_RE = re.compile(cls._VALID_URL) + return cls._VALID_URL_RE.match(url) is not None + + @classmethod + def _match_id(cls, url): + if '_VALID_URL_RE' not in cls.__dict__: + cls._VALID_URL_RE = re.compile(cls._VALID_URL) + m = cls._VALID_URL_RE.match(url) + assert m + return compat_str(m.group('id')) + + @classmethod + def working(cls): + """Getter method for _WORKING.""" + return cls._WORKING + + def initialize(self): + """Initializes an instance (authentication, etc).""" + self._initialize_geo_bypass({ + 'countries': self._GEO_COUNTRIES, + 'ip_blocks': self._GEO_IP_BLOCKS, + }) + if not self._ready: + self._real_initialize() + self._ready = True + + def _initialize_geo_bypass(self, geo_bypass_context): + """ + Initialize geo restriction bypass mechanism. + + This method is used to initialize geo bypass mechanism based on faking + X-Forwarded-For HTTP header. A random country from provided country list + is selected and a random IP belonging to this country is generated. This + IP will be passed as X-Forwarded-For HTTP header in all subsequent + HTTP requests. + + This method will be used for initial geo bypass mechanism initialization + during the instance initialization with _GEO_COUNTRIES and + _GEO_IP_BLOCKS. + + You may also manually call it from extractor's code if geo bypass + information is not available beforehand (e.g. obtained during + extraction) or due to some other reason. In this case you should pass + this information in geo bypass context passed as first argument. It may + contain following fields: + + countries: List of geo unrestricted countries (similar + to _GEO_COUNTRIES) + ip_blocks: List of geo unrestricted IP blocks in CIDR notation + (similar to _GEO_IP_BLOCKS) + + """ + if not self._x_forwarded_for_ip: + + # Geo bypass mechanism is explicitly disabled by user + if not self._downloader.params.get('geo_bypass', True): + return + + if not geo_bypass_context: + geo_bypass_context = {} + + # Backward compatibility: previously _initialize_geo_bypass + # expected a list of countries, some 3rd party code may still use + # it this way + if isinstance(geo_bypass_context, (list, tuple)): + geo_bypass_context = { + 'countries': geo_bypass_context, + } + + # The whole point of geo bypass mechanism is to fake IP + # as X-Forwarded-For HTTP header based on some IP block or + # country code. + + # Path 1: bypassing based on IP block in CIDR notation + + # Explicit IP block specified by user, use it right away + # regardless of whether extractor is geo bypassable or not + ip_block = self._downloader.params.get('geo_bypass_ip_block', None) + + # Otherwise use random IP block from geo bypass context but only + # if extractor is known as geo bypassable + if not ip_block: + ip_blocks = geo_bypass_context.get('ip_blocks') + if self._GEO_BYPASS and ip_blocks: + ip_block = random.choice(ip_blocks) + + if ip_block: + self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block) + if self._downloader.params.get('verbose', False): + self._downloader.to_screen( + '[debug] Using fake IP %s as X-Forwarded-For.' + % self._x_forwarded_for_ip) + return + + # Path 2: bypassing based on country code + + # Explicit country code specified by user, use it right away + # regardless of whether extractor is geo bypassable or not + country = self._downloader.params.get('geo_bypass_country', None) + + # Otherwise use random country code from geo bypass context but + # only if extractor is known as geo bypassable + if not country: + countries = geo_bypass_context.get('countries') + if self._GEO_BYPASS and countries: + country = random.choice(countries) + + if country: + self._x_forwarded_for_ip = GeoUtils.random_ipv4(country) + if self._downloader.params.get('verbose', False): + self._downloader.to_screen( + '[debug] Using fake IP %s (%s) as X-Forwarded-For.' + % (self._x_forwarded_for_ip, country.upper())) + + def extract(self, url): + """Extracts URL information and returns it in list of dicts.""" + try: + for _ in range(2): + try: + self.initialize() + ie_result = self._real_extract(url) + if self._x_forwarded_for_ip: + ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip + return ie_result + except GeoRestrictedError as e: + if self.__maybe_fake_ip_and_retry(e.countries): + continue + raise + except ExtractorError: + raise + except compat_http_client.IncompleteRead as e: + raise ExtractorError('A network error has occurred.', cause=e, expected=True) + except (KeyError, StopIteration) as e: + raise ExtractorError('An extractor error has occurred.', cause=e) + + def __maybe_fake_ip_and_retry(self, countries): + if (not self._downloader.params.get('geo_bypass_country', None) + and self._GEO_BYPASS + and self._downloader.params.get('geo_bypass', True) + and not self._x_forwarded_for_ip + and countries): + country_code = random.choice(countries) + self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) + if self._x_forwarded_for_ip: + self.report_warning( + 'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.' + % (self._x_forwarded_for_ip, country_code.upper())) + return True + return False + + def set_downloader(self, downloader): + """Sets the downloader for this IE.""" + self._downloader = downloader + + def _real_initialize(self): + """Real initialization process. Redefine in subclasses.""" + pass + + def _real_extract(self, url): + """Real extraction process. Redefine in subclasses.""" + pass + + @classmethod + def ie_key(cls): + """A string for getting the InfoExtractor with get_info_extractor""" + return compat_str(cls.__name__[:-2]) + + @property + def IE_NAME(self): + return compat_str(type(self).__name__[:-2]) + + @staticmethod + def __can_accept_status_code(err, expected_status): + assert isinstance(err, compat_urllib_error.HTTPError) + if expected_status is None: + return False + if isinstance(expected_status, compat_integer_types): + return err.code == expected_status + elif isinstance(expected_status, (list, tuple)): + return err.code in expected_status + elif callable(expected_status): + return expected_status(err.code) is True + else: + assert False + + def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None): + """ + Return the response handle. + + See _download_webpage docstring for arguments specification. + """ + if note is None: + self.report_download_webpage(video_id) + elif note is not False: + if video_id is None: + self.to_screen('%s' % (note,)) + else: + self.to_screen('%s: %s' % (video_id, note)) + + # Some sites check X-Forwarded-For HTTP header in order to figure out + # the origin of the client behind proxy. This allows bypassing geo + # restriction by faking this header's value to IP that belongs to some + # geo unrestricted country. We will do so once we encounter any + # geo restriction error. + if self._x_forwarded_for_ip: + if 'X-Forwarded-For' not in headers: + headers['X-Forwarded-For'] = self._x_forwarded_for_ip + + if isinstance(url_or_request, compat_urllib_request.Request): + url_or_request = update_Request( + url_or_request, data=data, headers=headers, query=query) + else: + if query: + url_or_request = update_url_query(url_or_request, query) + if data is not None or headers: + url_or_request = sanitized_Request(url_or_request, data, headers) + try: + return self._downloader.urlopen(url_or_request) + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + if isinstance(err, compat_urllib_error.HTTPError): + if self.__can_accept_status_code(err, expected_status): + # Retain reference to error to prevent file object from + # being closed before it can be read. Works around the + # effects of <https://bugs.python.org/issue15002> + # introduced in Python 3.4.1. + err.fp._error = err + return err.fp + + if errnote is False: + return False + if errnote is None: + errnote = 'Unable to download webpage' + + errmsg = '%s: %s' % (errnote, error_to_compat_str(err)) + if fatal: + raise ExtractorError(errmsg, sys.exc_info()[2], cause=err) + else: + self._downloader.report_warning(errmsg) + return False + + def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): + """ + Return a tuple (page content as string, URL handle). + + See _download_webpage docstring for arguments specification. + """ + # Strip hashes from the URL (#1038) + if isinstance(url_or_request, (compat_str, str)): + url_or_request = url_or_request.partition('#')[0] + + urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status) + if urlh is False: + assert not fatal + return False + content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding) + return (content, urlh) + + @staticmethod + def _guess_encoding_from_content(content_type, webpage_bytes): + m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type) + if m: + encoding = m.group(1) + else: + m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]', + webpage_bytes[:1024]) + if m: + encoding = m.group(1).decode('ascii') + elif webpage_bytes.startswith(b'\xff\xfe'): + encoding = 'utf-16' + else: + encoding = 'utf-8' + + return encoding + + def __check_blocked(self, content): + first_block = content[:512] + if ('<title>Access to this site is blocked' in content + and 'Websense' in first_block): + msg = 'Access to this webpage has been blocked by Websense filtering software in your network.' + blocked_iframe = self._html_search_regex( + r'' + + xml_root = self._html_search_regex( + PLAYER_REGEX, start_page, 'xml root', default=None) + if xml_root is None: + # Probably need to authenticate + login_res = self._login(webpage_url, display_id) + if login_res is None: + self.report_warning('Could not login.') + else: + start_page = login_res + # Grab the url from the authenticated page + xml_root = self._html_search_regex( + PLAYER_REGEX, start_page, 'xml root') + + xml_name = self._html_search_regex( + r'', webpage): + url = self._search_regex( + r'src=(["\'])(?P.+?partnerplayer.+?)\1', iframe, + 'player URL', default=None, group='url') + if url: + break + + if not url: + url = self._og_search_url(webpage) + + mobj = re.match( + self._VALID_URL, self._proto_relative_url(url.strip())) + + player_id = mobj.group('player_id') + if not display_id: + display_id = player_id + if player_id: + player_page = self._download_webpage( + url, display_id, note='Downloading player page', + errnote='Could not download player page') + video_id = self._search_regex( + r'\d+)' + _TEST = { + 'url': 'http://www.pearvideo.com/video_1076290', + 'info_dict': { + 'id': '1076290', + 'ext': 'mp4', + 'title': '小浣熊在主人家玻璃上滚石头:没砸', + 'description': 'md5:01d576b747de71be0ee85eb7cac25f9d', + 'timestamp': 1494275280, + 'upload_date': '20170508', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + quality = qualities( + ('ldflv', 'ld', 'sdflv', 'sd', 'hdflv', 'hd', 'src')) + + formats = [{ + 'url': mobj.group('url'), + 'format_id': mobj.group('id'), + 'quality': quality(mobj.group('id')), + } for mobj in re.finditer( + r'(?P[a-zA-Z]+)Url\s*=\s*(["\'])(?P(?:https?:)?//.+?)\2', + webpage)] + self._sort_formats(formats) + + title = self._search_regex( + (r']+\bclass=(["\'])video-tt\1[^>]*>(?P[^<]+)', + r'<[^>]+\bdata-title=(["\'])(?P(?:(?!\1).)+)\1'), + webpage, 'title', group='value') + description = self._search_regex( + (r']+\bclass=(["\'])summary\1[^>]*>(?P[^<]+)', + r'<[^>]+\bdata-summary=(["\'])(?P(?:(?!\1).)+)\1'), + webpage, 'description', default=None, + group='value') or self._html_search_meta('Description', webpage) + timestamp = unified_timestamp(self._search_regex( + r']+\bclass=["\']date["\'][^>]*>([^<]+)', + webpage, 'timestamp', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'formats': formats, + } diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py new file mode 100644 index 000000000..e03c3d1d3 --- /dev/null +++ b/youtube_dl/extractor/peertube.py @@ -0,0 +1,250 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + parse_resolution, + try_get, + unified_timestamp, + url_or_none, + urljoin, +) + + +class PeerTubeIE(InfoExtractor): + _INSTANCES_RE = r'''(?: + # Taken from https://instances.joinpeertube.org/instances + tube\.openalgeria\.org| + peertube\.pointsecu\.fr| + peertube\.nogafa\.org| + peertube\.pl| + megatube\.lilomoino\.fr| + peertube\.tamanoir\.foucry\.net| + peertube\.inapurna\.org| + peertube\.netzspielplatz\.de| + video\.deadsuperhero\.com| + peertube\.devosi\.org| + peertube\.1312\.media| + tube\.worldofhauru\.xyz| + tube\.bootlicker\.party| + skeptikon\.fr| + peertube\.geekshell\.fr| + tube\.opportunis\.me| + peertube\.peshane\.net| + video\.blueline\.mg| + tube\.homecomputing\.fr| + videos\.cloudfrancois\.fr| + peertube\.viviers-fibre\.net| + tube\.ouahpiti\.info| + video\.tedomum\.net| + video\.g3l\.org| + fontube\.fr| + peertube\.gaialabs\.ch| + peertube\.extremely\.online| + peertube\.public-infrastructure\.eu| + tube\.kher\.nl| + peertube\.qtg\.fr| + tube\.22decembre\.eu| + facegirl\.me| + video\.migennes\.net| + janny\.moe| + tube\.p2p\.legal| + video\.atlanti\.se| + troll\.tv| + peertube\.geekael\.fr| + vid\.leotindall\.com| + video\.anormallostpod\.ovh| + p-tube\.h3z\.jp| + tube\.darfweb\.eu| + videos\.iut-orsay\.fr| + peertube\.solidev\.net| + videos\.symphonie-of-code\.fr| + testtube\.ortg\.de| + videos\.cemea\.org| + peertube\.gwendalavir\.eu| + video\.passageenseine\.fr| + videos\.festivalparminous\.org| + peertube\.touhoppai\.moe| + peertube\.duckdns\.org| + sikke\.fi| + peertube\.mastodon\.host| + firedragonvideos\.com| + vidz\.dou\.bet| + peertube\.koehn\.com| + peer\.hostux\.social| + share\.tube| + peertube\.walkingmountains\.fr| + medias\.libox\.fr| + peertube\.moe| + peertube\.xyz| + jp\.peertube\.network| + videos\.benpro\.fr| + tube\.otter\.sh| + peertube\.angristan\.xyz| + peertube\.parleur\.net| + peer\.ecutsa\.fr| + peertube\.heraut\.eu| + peertube\.tifox\.fr| + peertube\.maly\.io| + vod\.mochi\.academy| + exode\.me| + coste\.video| + tube\.aquilenet\.fr| + peertube\.gegeweb\.eu| + framatube\.org| + thinkerview\.video| + tube\.conferences-gesticulees\.net| + peertube\.datagueule\.tv| + video\.lqdn\.fr| + meilleurtube\.delire\.party| + tube\.mochi\.academy| + peertube\.dav\.li| + media\.zat\.im| + pytu\.be| + peertube\.valvin\.fr| + peertube\.nsa\.ovh| + video\.colibris-outilslibres\.org| + video\.hispagatos\.org| + tube\.svnet\.fr| + peertube\.video| + videos\.lecygnenoir\.info| + peertube3\.cpy\.re| + peertube2\.cpy\.re| + videos\.tcit\.fr| + peertube\.cpy\.re + )''' + _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' + _VALID_URL = r'''(?x) + (?: + peertube:(?P[^:]+):| + https?://(?P%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/ + ) + (?P%s) + ''' % (_INSTANCES_RE, _UUID_RE) + _TESTS = [{ + 'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c', + 'md5': '80f24ff364cc9d333529506a263e7feb', + 'info_dict': { + 'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c', + 'ext': 'mp4', + 'title': 'wow', + 'description': 'wow such video, so gif', + 'thumbnail': r're:https?://.*\.(?:jpg|png)', + 'timestamp': 1519297480, + 'upload_date': '20180222', + 'uploader': 'Luclu7', + 'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1', + 'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7', + 'license': 'Unknown', + 'duration': 3, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'tags': list, + 'categories': list, + } + }, { + 'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44', + 'only_matching': True, + }, { + # nsfw + 'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39', + 'only_matching': True, + }, { + 'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7', + 'only_matching': True, + }, { + 'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8', + 'only_matching': True, + }, { + 'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205', + 'only_matching': True, + }] + + @staticmethod + def _extract_peertube_url(webpage, source_url): + mobj = re.match( + r'https?://(?P[^/]+)/videos/watch/(?P%s)' + % PeerTubeIE._UUID_RE, source_url) + if mobj and any(p in webpage for p in ( + 'PeerTube<', + 'There will be other non JS-based clients to access PeerTube', + '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')): + return 'peertube:%s:%s' % mobj.group('host', 'id') + + @staticmethod + def _extract_urls(webpage, source_url): + entries = re.findall( + r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)''' + % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage) + if not entries: + peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url) + if peertube_url: + entries = [peertube_url] + return entries + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') or mobj.group('host_2') + video_id = mobj.group('id') + + video = self._download_json( + 'https://%s/api/v1/videos/%s' % (host, video_id), video_id) + + title = video['name'] + + formats = [] + for file_ in video['files']: + if not isinstance(file_, dict): + continue + file_url = url_or_none(file_.get('fileUrl')) + if not file_url: + continue + file_size = int_or_none(file_.get('size')) + format_id = try_get( + file_, lambda x: x['resolution']['label'], compat_str) + f = parse_resolution(format_id) + f.update({ + 'url': file_url, + 'format_id': format_id, + 'filesize': file_size, + }) + formats.append(f) + self._sort_formats(formats) + + def account_data(field): + return try_get(video, lambda x: x['account'][field], compat_str) + + category = try_get(video, lambda x: x['category']['label'], compat_str) + categories = [category] if category else None + + nsfw = video.get('nsfw') + if nsfw is bool: + age_limit = 18 if nsfw else 0 + else: + age_limit = None + + return { + 'id': video_id, + 'title': title, + 'description': video.get('description'), + 'thumbnail': urljoin(url, video.get('thumbnailPath')), + 'timestamp': unified_timestamp(video.get('publishedAt')), + 'uploader': account_data('displayName'), + 'uploader_id': account_data('uuid'), + 'uploder_url': account_data('url'), + 'license': try_get( + video, lambda x: x['licence']['label'], compat_str), + 'duration': int_or_none(video.get('duration')), + 'view_count': int_or_none(video.get('views')), + 'like_count': int_or_none(video.get('likes')), + 'dislike_count': int_or_none(video.get('dislikes')), + 'age_limit': age_limit, + 'tags': try_get(video, lambda x: x['tags'], list), + 'categories': categories, + 'formats': formats, + } diff --git a/youtube_dl/extractor/people.py b/youtube_dl/extractor/people.py new file mode 100644 index 000000000..6ca95715e --- /dev/null +++ b/youtube_dl/extractor/people.py @@ -0,0 +1,32 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class PeopleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?people\.com/people/videos/0,,(?P<id>\d+),00\.html' + + _TEST = { + 'url': 'http://www.people.com/people/videos/0,,20995451,00.html', + 'info_dict': { + 'id': 'ref:20995451', + 'ext': 'mp4', + 'title': 'Astronaut Love Triangle Victim Speaks Out: “The Crime in 2007 Hasn’t Defined Us”', + 'description': 'Colleen Shipman speaks to PEOPLE for the first time about life after the attack', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 246.318, + 'timestamp': 1458720585, + 'upload_date': '20160323', + 'uploader_id': '416418724', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['BrightcoveNew'], + } + + def _real_extract(self, url): + return self.url_result( + 'http://players.brightcove.net/416418724/default_default/index.html?videoId=ref:%s' + % self._match_id(url), 'BrightcoveNew') diff --git a/youtube_dl/extractor/performgroup.py b/youtube_dl/extractor/performgroup.py new file mode 100644 index 000000000..26942bfb3 --- /dev/null +++ b/youtube_dl/extractor/performgroup.py @@ -0,0 +1,83 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import int_or_none + + +class PerformGroupIE(InfoExtractor): + _VALID_URL = r'https?://player\.performgroup\.com/eplayer(?:/eplayer\.html|\.js)#/?(?P<id>[0-9a-f]{26})\.(?P<auth_token>[0-9a-z]{26})' + _TESTS = [{ + # http://www.faz.net/aktuell/sport/fussball/wm-2018-playoffs-schweiz-besiegt-nordirland-1-0-15286104.html + 'url': 'http://player.performgroup.com/eplayer/eplayer.html#d478c41c5d192f56b9aa859de8.1w4crrej5w14e1ed4s1ce4ykab', + 'md5': '259cb03d142e2e52471e8837ecacb29f', + 'info_dict': { + 'id': 'xgrwobuzumes1lwjxtcdpwgxd', + 'ext': 'mp4', + 'title': 'Liga MX: Keine Einsicht nach Horrorfoul', + 'description': 'md5:7cd3b459c82725b021e046ab10bf1c5b', + 'timestamp': 1511533477, + 'upload_date': '20171124', + } + }] + + def _call_api(self, service, auth_token, content_id, referer_url): + return self._download_json( + 'http://ep3.performfeeds.com/ep%s/%s/%s/' % (service, auth_token, content_id), + content_id, headers={ + 'Referer': referer_url, + 'Origin': 'http://player.performgroup.com', + }, query={ + '_fmt': 'json', + }) + + def _real_extract(self, url): + player_id, auth_token = re.search(self._VALID_URL, url).groups() + bootstrap = self._call_api('bootstrap', auth_token, player_id, url) + video = bootstrap['config']['dataSource']['sourceItems'][0]['videos'][0] + video_id = video['uuid'] + vod = self._call_api('vod', auth_token, video_id, url) + media = vod['videos']['video'][0]['media'] + + formats = [] + hls_url = media.get('hls', {}).get('url') + if hls_url: + formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + + hds_url = media.get('hds', {}).get('url') + if hds_url: + formats.extend(self._extract_f4m_formats(hds_url + '?hdcore', video_id, f4m_id='hds', fatal=False)) + + for c in media.get('content', []): + c_url = c.get('url') + if not c_url: + continue + tbr = int_or_none(c.get('bitrate'), 1000) + format_id = 'http' + if tbr: + format_id += '-%d' % tbr + formats.append({ + 'format_id': format_id, + 'url': c_url, + 'tbr': tbr, + 'width': int_or_none(c.get('width')), + 'height': int_or_none(c.get('height')), + 'filesize': int_or_none(c.get('fileSize')), + 'vcodec': c.get('type'), + 'fps': int_or_none(c.get('videoFrameRate')), + 'vbr': int_or_none(c.get('videoRate'), 1000), + 'abr': int_or_none(c.get('audioRate'), 1000), + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video['title'], + 'description': video.get('description'), + 'thumbnail': video.get('poster'), + 'duration': int_or_none(video.get('duration')), + 'timestamp': int_or_none(video.get('publishedTime'), 1000), + 'formats': formats, + } diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py new file mode 100644 index 000000000..b337a56c0 --- /dev/null +++ b/youtube_dl/extractor/periscope.py @@ -0,0 +1,171 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_iso8601, + unescapeHTML, +) + + +class PeriscopeBaseIE(InfoExtractor): + def _call_api(self, method, query, item_id): + return self._download_json( + 'https://api.periscope.tv/api/v2/%s' % method, + item_id, query=query) + + +class PeriscopeIE(PeriscopeBaseIE): + IE_DESC = 'Periscope' + IE_NAME = 'periscope' + _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)' + # Alive example URLs can be found here http://onperiscope.com/ + _TESTS = [{ + 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==', + 'md5': '65b57957972e503fcbbaeed8f4fa04ca', + 'info_dict': { + 'id': '56102209', + 'ext': 'mp4', + 'title': 'Bec Boop - 🚠✈️🇬🇧 Fly above #London in Emirates Air Line cable car at night 🇬🇧✈️🚠 #BoopScope 🎀💗', + 'timestamp': 1438978559, + 'upload_date': '20150807', + 'uploader': 'Bec Boop', + 'uploader_id': '1465763', + }, + 'skip': 'Expires in 24 hours', + }, { + 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv', + 'only_matching': True, + }, { + 'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX', + 'only_matching': True, + }, { + 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv', + 'only_matching': True, + }] + + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?(?:periscope|pscp)\.tv/(?:(?!\1).)+)\1', webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + token = self._match_id(url) + + stream = self._call_api( + 'accessVideoPublic', {'broadcast_id': token}, token) + + broadcast = stream['broadcast'] + title = broadcast['status'] + + uploader = broadcast.get('user_display_name') or broadcast.get('username') + uploader_id = (broadcast.get('user_id') or broadcast.get('username')) + + title = '%s - %s' % (uploader, title) if uploader else title + state = broadcast.get('state').lower() + if state == 'running': + title = self._live_title(title) + timestamp = parse_iso8601(broadcast.get('created_at')) + + thumbnails = [{ + 'url': broadcast[image], + } for image in ('image_url', 'image_url_small') if broadcast.get(image)] + + width = int_or_none(broadcast.get('width')) + height = int_or_none(broadcast.get('height')) + + def add_width_and_height(f): + for key, val in (('width', width), ('height', height)): + if not f.get(key): + f[key] = val + + video_urls = set() + formats = [] + for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'): + video_url = stream.get(format_id + '_url') + if not video_url or video_url in video_urls: + continue + video_urls.add(video_url) + if format_id != 'rtmp': + m3u8_formats = self._extract_m3u8_formats( + video_url, token, 'mp4', + entry_protocol='m3u8_native' + if state in ('ended', 'timed_out') else 'm3u8', + m3u8_id=format_id, fatal=False) + if len(m3u8_formats) == 1: + add_width_and_height(m3u8_formats[0]) + formats.extend(m3u8_formats) + continue + rtmp_format = { + 'url': video_url, + 'ext': 'flv' if format_id == 'rtmp' else 'mp4', + } + add_width_and_height(rtmp_format) + formats.append(rtmp_format) + self._sort_formats(formats) + + return { + 'id': broadcast.get('id') or token, + 'title': title, + 'timestamp': timestamp, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'thumbnails': thumbnails, + 'formats': formats, + } + + +class PeriscopeUserIE(PeriscopeBaseIE): + _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/(?P<id>[^/]+)/?$' + IE_DESC = 'Periscope user videos' + IE_NAME = 'periscope:user' + + _TEST = { + 'url': 'https://www.periscope.tv/LularoeHusbandMike/', + 'info_dict': { + 'id': 'LularoeHusbandMike', + 'title': 'LULAROE HUSBAND MIKE', + 'description': 'md5:6cf4ec8047768098da58e446e82c82f0', + }, + # Periscope only shows videos in the last 24 hours, so it's possible to + # get 0 videos + 'playlist_mincount': 0, + } + + def _real_extract(self, url): + user_name = self._match_id(url) + + webpage = self._download_webpage(url, user_name) + + data_store = self._parse_json( + unescapeHTML(self._search_regex( + r'data-store=(["\'])(?P<data>.+?)\1', + webpage, 'data store', default='{}', group='data')), + user_name) + + user = list(data_store['UserCache']['users'].values())[0]['user'] + user_id = user['id'] + session_id = data_store['SessionToken']['public']['broadcastHistory']['token']['session_id'] + + broadcasts = self._call_api( + 'getUserBroadcastsPublic', + {'user_id': user_id, 'session_id': session_id}, + user_name)['broadcasts'] + + broadcast_ids = [ + broadcast['id'] for broadcast in broadcasts if broadcast.get('id')] + + title = user.get('display_name') or user.get('username') or user_name + description = user.get('description') + + entries = [ + self.url_result( + 'https://www.periscope.tv/%s/%s' % (user_name, broadcast_id)) + for broadcast_id in broadcast_ids] + + return self.playlist_result(entries, user_id, title, description) diff --git a/youtube_dl/extractor/philharmoniedeparis.py b/youtube_dl/extractor/philharmoniedeparis.py new file mode 100644 index 000000000..f723a2b3b --- /dev/null +++ b/youtube_dl/extractor/philharmoniedeparis.py @@ -0,0 +1,100 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + try_get, + urljoin, +) + + +class PhilharmonieDeParisIE(InfoExtractor): + IE_DESC = 'Philharmonie de Paris' + _VALID_URL = r'''(?x) + https?:// + (?: + live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)| + pad\.philharmoniedeparis\.fr/doc/CIMU/ + ) + (?P<id>\d+) + ''' + _TESTS = [{ + 'url': 'http://pad.philharmoniedeparis.fr/doc/CIMU/1086697/jazz-a-la-villette-knower', + 'md5': 'a0a4b195f544645073631cbec166a2c2', + 'info_dict': { + 'id': '1086697', + 'ext': 'mp4', + 'title': 'Jazz à la Villette : Knower', + }, + }, { + 'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html', + 'info_dict': { + 'id': '1032066', + 'title': 'md5:0a031b81807b3593cffa3c9a87a167a0', + }, + 'playlist_mincount': 2, + }, { + 'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html', + 'only_matching': True, + }, { + 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr', + 'only_matching': True, + }] + _LIVE_URL = 'https://live.philharmoniedeparis.fr' + + def _real_extract(self, url): + video_id = self._match_id(url) + + config = self._download_json( + '%s/otoPlayer/config.ashx' % self._LIVE_URL, video_id, query={ + 'id': video_id, + 'lang': 'fr-FR', + }) + + def extract_entry(source): + if not isinstance(source, dict): + return + title = source.get('title') + if not title: + return + files = source.get('files') + if not isinstance(files, dict): + return + format_urls = set() + formats = [] + for format_id in ('mobile', 'desktop'): + format_url = try_get( + files, lambda x: x[format_id]['file'], compat_str) + if not format_url or format_url in format_urls: + continue + format_urls.add(format_url) + m3u8_url = urljoin(self._LIVE_URL, format_url) + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + if not formats: + return + self._sort_formats(formats) + return { + 'title': title, + 'formats': formats, + } + + thumbnail = urljoin(self._LIVE_URL, config.get('image')) + + info = extract_entry(config) + if info: + info.update({ + 'id': video_id, + 'thumbnail': thumbnail, + }) + return info + + entries = [] + for num, chapter in enumerate(config['chapters'], start=1): + entry = extract_entry(chapter) + entry['id'] = '%s-%d' % (video_id, num) + entries.append(entry) + + return self.playlist_result(entries, video_id, config.get('title')) diff --git a/youtube_dl/extractor/phoenix.py b/youtube_dl/extractor/phoenix.py new file mode 100644 index 000000000..e435c28e1 --- /dev/null +++ b/youtube_dl/extractor/phoenix.py @@ -0,0 +1,45 @@ +from __future__ import unicode_literals + +from .dreisat import DreiSatIE + + +class PhoenixIE(DreiSatIE): + IE_NAME = 'phoenix.de' + _VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/ + (?: + phoenix/die_sendungen/(?:[^/]+/)? + )? + (?P<id>[0-9]+)''' + _TESTS = [ + { + 'url': 'http://www.phoenix.de/content/884301', + 'md5': 'ed249f045256150c92e72dbb70eadec6', + 'info_dict': { + 'id': '884301', + 'ext': 'mp4', + 'title': 'Michael Krons mit Hans-Werner Sinn', + 'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr', + 'upload_date': '20141025', + 'uploader': 'Im Dialog', + } + }, + { + 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815', + 'only_matching': True, + }, + { + 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234', + 'only_matching': True, + }, + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + internal_id = self._search_regex( + r'<div class="phx_vod" id="phx_vod_([0-9]+)"', + webpage, 'internal video ID') + + api_url = 'http://www.phoenix.de/php/mediaplayer/data/beitrags_details.php?ak=web&id=%s' % internal_id + return self.extract_from_xml_url(video_id, api_url) diff --git a/youtube_dl/extractor/photobucket.py b/youtube_dl/extractor/photobucket.py new file mode 100644 index 000000000..6c8bbe1d9 --- /dev/null +++ b/youtube_dl/extractor/photobucket.py @@ -0,0 +1,46 @@ +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote + + +class PhotobucketIE(InfoExtractor): + _VALID_URL = r'https?://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' + _TEST = { + 'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', + 'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99', + 'info_dict': { + 'id': 'zpsc0c3b9fa', + 'ext': 'mp4', + 'timestamp': 1367669341, + 'upload_date': '20130504', + 'uploader': 'rachaneronas', + 'title': 'Tired of Link Building? Try BacklinkMyDomain.com!', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + video_extension = mobj.group('ext') + + webpage = self._download_webpage(url, video_id) + + # Extract URL, uploader, and title from webpage + self.report_extraction(video_id) + info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);', + webpage, 'info json') + info = json.loads(info_json) + url = compat_urllib_parse_unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url')) + return { + 'id': video_id, + 'url': url, + 'uploader': info['username'], + 'timestamp': info['creationDate'], + 'title': info['title'], + 'ext': video_extension, + 'thumbnail': info['thumbUrl'], + } diff --git a/youtube_dl/extractor/picarto.py b/youtube_dl/extractor/picarto.py new file mode 100644 index 000000000..8099ef1d6 --- /dev/null +++ b/youtube_dl/extractor/picarto.py @@ -0,0 +1,153 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import time + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + js_to_json, + try_get, + update_url_query, + urlencode_postdata, +) + + +class PicartoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?' + _TEST = { + 'url': 'https://picarto.tv/Setz', + 'info_dict': { + 'id': 'Setz', + 'ext': 'mp4', + 'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'timestamp': int, + 'is_live': True + }, + 'skip': 'Stream is offline', + } + + @classmethod + def suitable(cls, url): + return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + channel_id = mobj.group('id') + + metadata = self._download_json( + 'https://api.picarto.tv/v1/channel/name/' + channel_id, + channel_id) + + if metadata.get('online') is False: + raise ExtractorError('Stream is offline', expected=True) + + cdn_data = self._download_json( + 'https://picarto.tv/process/channel', channel_id, + data=urlencode_postdata({'loadbalancinginfo': channel_id}), + note='Downloading load balancing info') + + token = mobj.group('token') or 'public' + params = { + 'con': int(time.time() * 1000), + 'token': token, + } + + prefered_edge = cdn_data.get('preferedEdge') + formats = [] + + for edge in cdn_data['edges']: + edge_ep = edge.get('ep') + if not edge_ep or not isinstance(edge_ep, compat_str): + continue + edge_id = edge.get('id') + for tech in cdn_data['techs']: + tech_label = tech.get('label') + tech_type = tech.get('type') + preference = 0 + if edge_id == prefered_edge: + preference += 1 + format_id = [] + if edge_id: + format_id.append(edge_id) + if tech_type == 'application/x-mpegurl' or tech_label == 'HLS': + format_id.append('hls') + formats.extend(self._extract_m3u8_formats( + update_url_query( + 'https://%s/hls/%s/index.m3u8' + % (edge_ep, channel_id), params), + channel_id, 'mp4', preference=preference, + m3u8_id='-'.join(format_id), fatal=False)) + continue + elif tech_type == 'video/mp4' or tech_label == 'MP4': + format_id.append('mp4') + formats.append({ + 'url': update_url_query( + 'https://%s/mp4/%s.mp4' % (edge_ep, channel_id), + params), + 'format_id': '-'.join(format_id), + 'preference': preference, + }) + else: + # rtmp format does not seem to work + continue + self._sort_formats(formats) + + mature = metadata.get('adult') + if mature is None: + age_limit = None + else: + age_limit = 18 if mature is True else 0 + + return { + 'id': channel_id, + 'title': self._live_title(metadata.get('title') or channel_id), + 'is_live': True, + 'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']), + 'channel': channel_id, + 'channel_url': 'https://picarto.tv/%s' % channel_id, + 'age_limit': age_limit, + 'formats': formats, + } + + +class PicartoVodIE(InfoExtractor): + _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv', + 'md5': '3ab45ba4352c52ee841a28fb73f2d9ca', + 'info_dict': { + 'id': 'ArtofZod_2017.12.12.00.13.23.flv', + 'ext': 'mp4', + 'title': 'ArtofZod_2017.12.12.00.13.23.flv', + 'thumbnail': r're:^https?://.*\.jpg' + }, + }, { + 'url': 'https://picarto.tv/videopopout/Plague', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + vod_info = self._parse_json( + self._search_regex( + r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage, + video_id), + video_id, transform_source=js_to_json) + + formats = self._extract_m3u8_formats( + vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_id, + 'thumbnail': vod_info.get('vodThumb'), + 'formats': formats, + } diff --git a/youtube_dl/extractor/piksel.py b/youtube_dl/extractor/piksel.py new file mode 100644 index 000000000..c0c276a50 --- /dev/null +++ b/youtube_dl/extractor/piksel.py @@ -0,0 +1,123 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + dict_get, + int_or_none, + unescapeHTML, + parse_iso8601, +) + + +class PikselIE(InfoExtractor): + _VALID_URL = r'https?://player\.piksel\.com/v/(?P<id>[a-z0-9]+)' + _TESTS = [ + { + 'url': 'http://player.piksel.com/v/nv60p12f', + 'md5': 'd9c17bbe9c3386344f9cfd32fad8d235', + 'info_dict': { + 'id': 'nv60p12f', + 'ext': 'mp4', + 'title': 'فن الحياة - الحلقة 1', + 'description': 'احدث برامج الداعية الاسلامي " مصطفي حسني " فى رمضان 2016علي النهار نور', + 'timestamp': 1465231790, + 'upload_date': '20160606', + } + }, + { + # Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al + 'url': 'https://player.piksel.com/v/v80kqp41', + 'md5': '753ddcd8cc8e4fa2dda4b7be0e77744d', + 'info_dict': { + 'id': 'v80kqp41', + 'ext': 'mp4', + 'title': 'WAW- State of Washington vs. Donald J. Trump, et al', + 'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.', + 'timestamp': 1486171129, + 'upload_date': '20170204', + } + } + ] + + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.com/v/[a-z0-9]+)', + webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + app_token = self._search_regex([ + r'clientAPI\s*:\s*"([^"]+)"', + r'data-de-api-key\s*=\s*"([^"]+)"' + ], webpage, 'app token') + response = self._download_json( + 'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token, + video_id, query={ + 'v': video_id + })['response'] + failure = response.get('failure') + if failure: + raise ExtractorError(response['failure']['reason'], expected=True) + video_data = response['WsProgramResponse']['program']['asset'] + title = video_data['title'] + + formats = [] + + m3u8_url = dict_get(video_data, [ + 'm3u8iPadURL', + 'ipadM3u8Url', + 'm3u8AndroidURL', + 'm3u8iPhoneURL', + 'iphoneM3u8Url']) + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + + asset_type = dict_get(video_data, ['assetType', 'asset_type']) + for asset_file in video_data.get('assetFiles', []): + # TODO: extract rtmp formats + http_url = asset_file.get('http_url') + if not http_url: + continue + tbr = None + vbr = int_or_none(asset_file.get('videoBitrate'), 1024) + abr = int_or_none(asset_file.get('audioBitrate'), 1024) + if asset_type == 'video': + tbr = vbr + abr + elif asset_type == 'audio': + tbr = abr + + format_id = ['http'] + if tbr: + format_id.append(compat_str(tbr)) + + formats.append({ + 'format_id': '-'.join(format_id), + 'url': unescapeHTML(http_url), + 'vbr': vbr, + 'abr': abr, + 'width': int_or_none(asset_file.get('videoWidth')), + 'height': int_or_none(asset_file.get('videoHeight')), + 'filesize': int_or_none(asset_file.get('filesize')), + 'tbr': tbr, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': video_data.get('description'), + 'thumbnail': video_data.get('thumbnailUrl'), + 'timestamp': parse_iso8601(video_data.get('dateadd')), + 'formats': formats, + } diff --git a/youtube_dl/extractor/pinkbike.py b/youtube_dl/extractor/pinkbike.py new file mode 100644 index 000000000..9f3501f77 --- /dev/null +++ b/youtube_dl/extractor/pinkbike.py @@ -0,0 +1,97 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + remove_end, + remove_start, + str_to_int, + unified_strdate, +) + + +class PinkbikeIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www\.)?pinkbike\.com/video/|es\.pinkbike\.org/i/kvid/kvid-y5\.swf\?id=)(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://www.pinkbike.com/video/402811/', + 'md5': '4814b8ca7651034cd87e3361d5c2155a', + 'info_dict': { + 'id': '402811', + 'ext': 'mp4', + 'title': 'Brandon Semenuk - RAW 100', + 'description': 'Official release: www.redbull.ca/rupertwalker', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 100, + 'upload_date': '20150406', + 'uploader': 'revelco', + 'location': 'Victoria, British Columbia, Canada', + 'view_count': int, + 'comment_count': int, + } + }, { + 'url': 'http://es.pinkbike.org/i/kvid/kvid-y5.swf?id=406629', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://www.pinkbike.com/video/%s' % video_id, video_id) + + formats = [] + for _, format_id, src in re.findall( + r'data-quality=((?:\\)?["\'])(.+?)\1[^>]+src=\1(.+?)\1', webpage): + height = int_or_none(self._search_regex( + r'^(\d+)[pP]$', format_id, 'height', default=None)) + formats.append({ + 'url': src, + 'format_id': format_id, + 'height': height, + }) + self._sort_formats(formats) + + title = remove_end(self._og_search_title(webpage), ' Video - Pinkbike') + description = self._html_search_regex( + r'(?s)id="media-description"[^>]*>(.+?)<', + webpage, 'description', default=None) or remove_start( + self._og_search_description(webpage), title + '. ') + thumbnail = self._og_search_thumbnail(webpage) + duration = int_or_none(self._html_search_meta( + 'video:duration', webpage, 'duration')) + + uploader = self._search_regex( + r'<a[^>]+\brel=["\']author[^>]+>([^<]+)', webpage, + 'uploader', fatal=False) + upload_date = unified_strdate(self._search_regex( + r'class="fullTime"[^>]+title="([^"]+)"', + webpage, 'upload date', fatal=False)) + + location = self._html_search_regex( + r'(?s)<dt>Location</dt>\s*<dd>(.+?)<', + webpage, 'location', fatal=False) + + def extract_count(webpage, label): + return str_to_int(self._search_regex( + r'<span[^>]+class="stat-num"[^>]*>([\d,.]+)</span>\s*<span[^>]+class="stat-label"[^>]*>%s' % label, + webpage, label, fatal=False)) + + view_count = extract_count(webpage, 'Views') + comment_count = extract_count(webpage, 'Comments') + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'upload_date': upload_date, + 'uploader': uploader, + 'location': location, + 'view_count': view_count, + 'comment_count': comment_count, + 'formats': formats + } diff --git a/youtube_dl/extractor/pladform.py b/youtube_dl/extractor/pladform.py new file mode 100644 index 000000000..e86c65396 --- /dev/null +++ b/youtube_dl/extractor/pladform.py @@ -0,0 +1,125 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, + xpath_text, + qualities, +) + + +class PladformIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + (?: + out\.pladform\.ru/player| + static\.pladform\.ru/player\.swf + ) + \?.*\bvideoid=| + video\.pladform\.ru/catalog/video/videoid/ + ) + (?P<id>\d+) + ''' + _TESTS = [{ + 'url': 'https://out.pladform.ru/player?pl=64471&videoid=3777899&vk_puid15=0&vk_puid34=0', + 'md5': '53362fac3a27352da20fa2803cc5cd6f', + 'info_dict': { + 'id': '3777899', + 'ext': 'mp4', + 'title': 'СТУДИЯ СОЮЗ • Шоу Студия Союз, 24 выпуск (01.02.2018) Нурлан Сабуров и Слава Комиссаренко', + 'description': 'md5:05140e8bf1b7e2d46e7ba140be57fd95', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 3190, + }, + }, { + 'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0', + 'only_matching': True, + }, { + 'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0', + 'only_matching': True, + }] + + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1', webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + video_id = self._match_id(url) + + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + pl = qs.get('pl', ['1'])[0] + + video = self._download_xml( + 'http://out.pladform.ru/getVideo', video_id, query={ + 'pl': pl, + 'videoid': video_id, + }) + + def fail(text): + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, text), + expected=True) + + if video.tag == 'error': + fail(video.text) + + quality = qualities(('ld', 'sd', 'hd')) + + formats = [] + for src in video.findall('./src'): + if src is None: + continue + format_url = src.text + if not format_url: + continue + if src.get('type') == 'hls' or determine_ext(format_url) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': src.text, + 'format_id': src.get('quality'), + 'quality': quality(src.get('quality')), + }) + + if not formats: + error = xpath_text(video, './cap', 'error', default=None) + if error: + fail(error) + + self._sort_formats(formats) + + webpage = self._download_webpage( + 'http://video.pladform.ru/catalog/video/videoid/%s' % video_id, + video_id) + + title = self._og_search_title(webpage, fatal=False) or xpath_text( + video, './/title', 'title', fatal=True) + description = self._search_regex( + r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False) + thumbnail = self._og_search_thumbnail(webpage) or xpath_text( + video, './/cover', 'cover') + + duration = int_or_none(xpath_text(video, './/time', 'duration')) + age_limit = int_or_none(xpath_text(video, './/age18', 'age limit')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'age_limit': age_limit, + 'formats': formats, + } diff --git a/youtube_dl/extractor/platzi.py b/youtube_dl/extractor/platzi.py new file mode 100644 index 000000000..557b2b5ad --- /dev/null +++ b/youtube_dl/extractor/platzi.py @@ -0,0 +1,217 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_str, +) +from ..utils import ( + clean_html, + ExtractorError, + int_or_none, + str_or_none, + try_get, + url_or_none, + urlencode_postdata, + urljoin, +) + + +class PlatziIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + platzi\.com/clases| # es version + courses\.platzi\.com/classes # en version + )/[^/]+/(?P<id>\d+)-[^/?\#&]+ + ''' + _LOGIN_URL = 'https://platzi.com/login/' + _NETRC_MACHINE = 'platzi' + + _TESTS = [{ + 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', + 'md5': '8f56448241005b561c10f11a595b37e3', + 'info_dict': { + 'id': '12074', + 'ext': 'mp4', + 'title': 'Creando nuestra primera página', + 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', + 'duration': 420, + }, + 'skip': 'Requires platzi account credentials', + }, { + 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', + 'info_dict': { + 'id': '13430', + 'ext': 'mp4', + 'title': 'Background', + 'description': 'md5:49c83c09404b15e6e71defaf87f6b305', + 'duration': 360, + }, + 'skip': 'Requires platzi account credentials', + 'params': { + 'skip_download': True, + }, + }] + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'email': username, + 'password': password, + }) + + urlh = self._request_webpage( + self._LOGIN_URL, None, 'Logging in', + data=urlencode_postdata(login_form), + headers={'Referer': self._LOGIN_URL}) + + # login succeeded + if 'platzi.com/login' not in compat_str(urlh.geturl()): + return + + login_error = self._webpage_read_content( + urlh, self._LOGIN_URL, None, 'Downloading login error page') + + login = self._parse_json( + self._search_regex( + r'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error, 'login'), + None) + + for kind in ('error', 'password', 'nonFields'): + error = str_or_none(login.get('%sError' % kind)) + if error: + raise ExtractorError( + 'Unable to login: %s' % error, expected=True) + raise ExtractorError('Unable to log in') + + def _real_extract(self, url): + lecture_id = self._match_id(url) + + webpage = self._download_webpage(url, lecture_id) + + data = self._parse_json( + self._search_regex( + r'client_data\s*=\s*({.+?})\s*;', webpage, 'client data'), + lecture_id) + + material = data['initialState']['material'] + desc = material['description'] + title = desc['title'] + + formats = [] + for server_id, server in material['videos'].items(): + if not isinstance(server, dict): + continue + for format_id in ('hls', 'dash'): + format_url = url_or_none(server.get(format_id)) + if not format_url: + continue + if format_id == 'hls': + formats.extend(self._extract_m3u8_formats( + format_url, lecture_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id=format_id, + note='Downloading %s m3u8 information' % server_id, + fatal=False)) + elif format_id == 'dash': + formats.extend(self._extract_mpd_formats( + format_url, lecture_id, mpd_id=format_id, + note='Downloading %s MPD manifest' % server_id, + fatal=False)) + self._sort_formats(formats) + + content = str_or_none(desc.get('content')) + description = (clean_html(compat_b64decode(content).decode('utf-8')) + if content else None) + duration = int_or_none(material.get('duration'), invscale=60) + + return { + 'id': lecture_id, + 'title': title, + 'description': description, + 'duration': duration, + 'formats': formats, + } + + +class PlatziCourseIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + platzi\.com/clases| # es version + courses\.platzi\.com/classes # en version + )/(?P<id>[^/?\#&]+) + ''' + _TESTS = [{ + 'url': 'https://platzi.com/clases/next-js/', + 'info_dict': { + 'id': '1311', + 'title': 'Curso de Next.js', + }, + 'playlist_count': 22, + }, { + 'url': 'https://courses.platzi.com/classes/communication-codestream/', + 'info_dict': { + 'id': '1367', + 'title': 'Codestream Course', + }, + 'playlist_count': 14, + }] + + @classmethod + def suitable(cls, url): + return False if PlatziIE.suitable(url) else super(PlatziCourseIE, cls).suitable(url) + + def _real_extract(self, url): + course_name = self._match_id(url) + + webpage = self._download_webpage(url, course_name) + + props = self._parse_json( + self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'), + course_name)['initialProps'] + + entries = [] + for chapter_num, chapter in enumerate(props['concepts'], 1): + if not isinstance(chapter, dict): + continue + materials = chapter.get('materials') + if not materials or not isinstance(materials, list): + continue + chapter_title = chapter.get('title') + chapter_id = str_or_none(chapter.get('id')) + for material in materials: + if not isinstance(material, dict): + continue + if material.get('material_type') != 'video': + continue + video_url = urljoin(url, material.get('url')) + if not video_url: + continue + entries.append({ + '_type': 'url_transparent', + 'url': video_url, + 'title': str_or_none(material.get('name')), + 'id': str_or_none(material.get('id')), + 'ie_key': PlatziIE.ie_key(), + 'chapter': chapter_title, + 'chapter_number': chapter_num, + 'chapter_id': chapter_id, + }) + + course_id = compat_str(try_get(props, lambda x: x['course']['id'])) + course_title = try_get(props, lambda x: x['course']['name'], compat_str) + + return self.playlist_result(entries, course_id, course_title) diff --git a/youtube_dl/extractor/playfm.py b/youtube_dl/extractor/playfm.py new file mode 100644 index 000000000..e766ccca3 --- /dev/null +++ b/youtube_dl/extractor/playfm.py @@ -0,0 +1,75 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + int_or_none, + parse_iso8601, +) + + +class PlayFMIE(InfoExtractor): + IE_NAME = 'play.fm' + _VALID_URL = r'https?://(?:www\.)?play\.fm/(?P<slug>(?:[^/]+/)+(?P<id>[^/]+))/?(?:$|[?#])' + + _TEST = { + 'url': 'https://www.play.fm/dan-drastic/sven-tasnadi-leipzig-electronic-music-batofar-paris-fr-2014-07-12', + 'md5': 'c505f8307825a245d0c7ad1850001f22', + 'info_dict': { + 'id': '71276', + 'ext': 'mp3', + 'title': 'Sven Tasnadi - LEIPZIG ELECTRONIC MUSIC @ Batofar (Paris,FR) - 2014-07-12', + 'description': '', + 'duration': 5627, + 'timestamp': 1406033781, + 'upload_date': '20140722', + 'uploader': 'Dan Drastic', + 'uploader_id': '71170', + 'view_count': int, + 'comment_count': int, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + slug = mobj.group('slug') + + recordings = self._download_json( + 'http://v2api.play.fm/recordings/slug/%s' % slug, video_id) + + error = recordings.get('error') + if isinstance(error, dict): + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, error.get('message')), + expected=True) + + audio_url = recordings['audio'] + video_id = compat_str(recordings.get('id') or video_id) + title = recordings['title'] + description = recordings.get('description') + duration = int_or_none(recordings.get('recordingDuration')) + timestamp = parse_iso8601(recordings.get('created_at')) + uploader = recordings.get('page', {}).get('title') + uploader_id = compat_str(recordings.get('page', {}).get('id')) + view_count = int_or_none(recordings.get('playCount')) + comment_count = int_or_none(recordings.get('commentCount')) + categories = [tag['name'] for tag in recordings.get('tags', []) if tag.get('name')] + + return { + 'id': video_id, + 'url': audio_url, + 'title': title, + 'description': description, + 'duration': duration, + 'timestamp': timestamp, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'view_count': view_count, + 'comment_count': comment_count, + 'categories': categories, + } diff --git a/youtube_dl/extractor/playplustv.py b/youtube_dl/extractor/playplustv.py new file mode 100644 index 000000000..1e30ab23a --- /dev/null +++ b/youtube_dl/extractor/playplustv.py @@ -0,0 +1,109 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ( + clean_html, + ExtractorError, + int_or_none, + PUTRequest, +) + + +class PlayPlusTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?playplus\.(?:com|tv)/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})' + _TEST = { + 'url': 'https://www.playplus.tv/VOD/7572/db8d274a5163424e967f35a30ddafb8e', + 'md5': 'd078cb89d7ab6b9df37ce23c647aef72', + 'info_dict': { + 'id': 'db8d274a5163424e967f35a30ddafb8e', + 'ext': 'mp4', + 'title': 'Capítulo 179 - Final', + 'description': 'md5:01085d62d8033a1e34121d3c3cabc838', + 'timestamp': 1529992740, + 'upload_date': '20180626', + }, + 'skip': 'Requires account credential', + } + _NETRC_MACHINE = 'playplustv' + _GEO_COUNTRIES = ['BR'] + _token = None + _profile_id = None + + def _call_api(self, resource, video_id=None, query=None): + return self._download_json('https://api.playplus.tv/api/media/v2/get' + resource, video_id, headers={ + 'Authorization': 'Bearer ' + self._token, + }, query=query) + + def _real_initialize(self): + email, password = self._get_login_info() + if email is None: + self.raise_login_required() + + req = PUTRequest( + 'https://api.playplus.tv/api/web/login', json.dumps({ + 'email': email, + 'password': password, + }).encode(), { + 'Content-Type': 'application/json; charset=utf-8', + }) + + try: + self._token = self._download_json(req, None)['token'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + raise ExtractorError(self._parse_json( + e.cause.read(), None)['errorMessage'], expected=True) + raise + + self._profile = self._call_api('Profiles')['list'][0]['_id'] + + def _real_extract(self, url): + project_id, media_id = re.match(self._VALID_URL, url).groups() + media = self._call_api( + 'Media', media_id, { + 'profileId': self._profile, + 'projectId': project_id, + 'mediaId': media_id, + })['obj'] + title = media['title'] + + formats = [] + for f in media.get('files', []): + f_url = f.get('url') + if not f_url: + continue + file_info = f.get('fileInfo') or {} + formats.append({ + 'url': f_url, + 'width': int_or_none(file_info.get('width')), + 'height': int_or_none(file_info.get('height')), + }) + self._sort_formats(formats) + + thumbnails = [] + for thumb in media.get('thumbs', []): + thumb_url = thumb.get('url') + if not thumb_url: + continue + thumbnails.append({ + 'url': thumb_url, + 'width': int_or_none(thumb.get('width')), + 'height': int_or_none(thumb.get('height')), + }) + + return { + 'id': media_id, + 'title': title, + 'formats': formats, + 'thumbnails': thumbnails, + 'description': clean_html(media.get('description')) or media.get('shortDescription'), + 'timestamp': int_or_none(media.get('publishDate'), 1000), + 'view_count': int_or_none(media.get('numberOfViews')), + 'comment_count': int_or_none(media.get('numberOfComments')), + 'tags': media.get('tags'), + } diff --git a/youtube_dl/extractor/plays.py b/youtube_dl/extractor/plays.py new file mode 100644 index 000000000..ddfc6f148 --- /dev/null +++ b/youtube_dl/extractor/plays.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import int_or_none + + +class PlaysTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?plays\.tv/(?:video|embeds)/(?P<id>[0-9a-f]{18})' + _TESTS = [{ + 'url': 'https://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall', + 'md5': 'dfeac1198506652b5257a62762cec7bc', + 'info_dict': { + 'id': '56af17f56c95335490', + 'ext': 'mp4', + 'title': 'Bjergsen - When you outplay the Azir wall', + 'description': 'Posted by Bjergsen', + } + }, { + 'url': 'https://plays.tv/embeds/56af17f56c95335490', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'https://plays.tv/video/%s' % video_id, video_id) + + info = self._search_json_ld(webpage, video_id,) + + mpd_url, sources = re.search( + r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>', + webpage).groups() + formats = self._extract_mpd_formats( + self._proto_relative_url(mpd_url), video_id, mpd_id='DASH') + for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources): + formats.append({ + 'url': self._proto_relative_url(format_url), + 'format_id': 'http-' + format_id, + 'height': int_or_none(height), + }) + self._sort_formats(formats) + + info.update({ + 'id': video_id, + 'description': self._og_search_description(webpage), + 'thumbnail': info.get('thumbnail') or self._og_search_thumbnail(webpage), + 'formats': formats, + }) + + return info diff --git a/youtube_dl/extractor/playtvak.py b/youtube_dl/extractor/playtvak.py new file mode 100644 index 000000000..4c5f57919 --- /dev/null +++ b/youtube_dl/extractor/playtvak.py @@ -0,0 +1,191 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_urlparse, + compat_urllib_parse_urlencode, +) +from ..utils import ( + ExtractorError, + int_or_none, + parse_iso8601, + qualities, +) + + +class PlaytvakIE(InfoExtractor): + IE_DESC = 'Playtvak.cz, iDNES.cz and Lidovky.cz' + _VALID_URL = r'https?://(?:.+?\.)?(?:playtvak|idnes|lidovky|metro)\.cz/.*\?(?:c|idvideo)=(?P<id>[^&]+)' + _TESTS = [{ + 'url': 'http://www.playtvak.cz/vyzente-vosy-a-srsne-ze-zahrady-dn5-/hodinovy-manzel.aspx?c=A150730_150323_hodinovy-manzel_kuko', + 'md5': '4525ae312c324b4be2f4603cc78ceb4a', + 'info_dict': { + 'id': 'A150730_150323_hodinovy-manzel_kuko', + 'ext': 'mp4', + 'title': 'Vyžeňte vosy a sršně ze zahrady', + 'description': 'md5:4436e61b7df227a093778efb7e373571', + 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', + 'duration': 279, + 'timestamp': 1438732860, + 'upload_date': '20150805', + 'is_live': False, + } + }, { # live video test + 'url': 'http://slowtv.playtvak.cz/planespotting-0pr-/planespotting.aspx?c=A150624_164934_planespotting_cat', + 'info_dict': { + 'id': 'A150624_164934_planespotting_cat', + 'ext': 'flv', + 'title': 're:^Planespotting [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze', + 'is_live': True, + }, + 'params': { + 'skip_download': True, # requires rtmpdump + }, + }, { # another live stream, this one without Misc.videoFLV + 'url': 'https://slowtv.playtvak.cz/zive-sledujte-vlaky-v-primem-prenosu-dwi-/hlavni-nadrazi.aspx?c=A151218_145728_hlavni-nadrazi_plap', + 'info_dict': { + 'id': 'A151218_145728_hlavni-nadrazi_plap', + 'ext': 'flv', + 'title': 're:^Hlavní nádraží [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'is_live': True, + }, + 'params': { + 'skip_download': True, # requires rtmpdump + }, + }, { # idnes.cz + 'url': 'http://zpravy.idnes.cz/pes-zavreny-v-aute-rozbijeni-okynek-v-aute-fj5-/domaci.aspx?c=A150809_104116_domaci_pku', + 'md5': '819832ba33cd7016e58a6658577fe289', + 'info_dict': { + 'id': 'A150809_104116_domaci_pku', + 'ext': 'mp4', + 'title': 'Zavřeli jsme mraženou pizzu do auta. Upekla se', + 'description': 'md5:01e73f02329e2e5760bd5eed4d42e3c2', + 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', + 'duration': 39, + 'timestamp': 1438969140, + 'upload_date': '20150807', + 'is_live': False, + } + }, { # lidovky.cz + 'url': 'http://www.lidovky.cz/dalsi-demonstrace-v-praze-o-migraci-duq-/video.aspx?c=A150808_214044_ln-video_ELE', + 'md5': 'c7209ac4ba9d234d4ad5bab7485bcee8', + 'info_dict': { + 'id': 'A150808_214044_ln-video_ELE', + 'ext': 'mp4', + 'title': 'Táhni! Demonstrace proti imigrantům budila emoce', + 'description': 'md5:97c81d589a9491fbfa323c9fa3cca72c', + 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', + 'timestamp': 1439052180, + 'upload_date': '20150808', + 'is_live': False, + } + }, { # metro.cz + 'url': 'http://www.metro.cz/video-pod-billboardem-se-na-vltavske-roztocil-kolotoc-deti-vozil-jen-par-hodin-1hx-/metro-extra.aspx?c=A141111_173251_metro-extra_row', + 'md5': '84fc1deedcac37b7d4a6ccae7c716668', + 'info_dict': { + 'id': 'A141111_173251_metro-extra_row', + 'ext': 'mp4', + 'title': 'Recesisté udělali z billboardu kolotoč', + 'description': 'md5:7369926049588c3989a66c9c1a043c4c', + 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', + 'timestamp': 1415725500, + 'upload_date': '20141111', + 'is_live': False, + } + }, { + 'url': 'http://www.playtvak.cz/embed.aspx?idvideo=V150729_141549_play-porad_kuko', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + info_url = self._html_search_regex( + r'Misc\.video(?:FLV)?\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url') + + parsed_url = compat_urlparse.urlparse(info_url) + + qs = compat_urlparse.parse_qs(parsed_url.query) + qs.update({ + 'reklama': ['0'], + 'type': ['js'], + }) + + info_url = compat_urlparse.urlunparse( + parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True))) + + json_info = self._download_json( + info_url, video_id, + transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1]) + + item = None + for i in json_info['items']: + if i.get('type') == 'video' or i.get('type') == 'stream': + item = i + break + if not item: + raise ExtractorError('No suitable stream found') + + quality = qualities(('low', 'middle', 'high')) + + formats = [] + for fmt in item['video']: + video_url = fmt.get('file') + if not video_url: + continue + + format_ = fmt['format'] + format_id = '%s_%s' % (format_, fmt['quality']) + preference = None + + if format_ in ('mp4', 'webm'): + ext = format_ + elif format_ == 'rtmp': + ext = 'flv' + elif format_ == 'apple': + ext = 'mp4' + # Some streams have mp3 audio which does not play + # well with ffmpeg filter aac_adtstoasc + preference = -1 + elif format_ == 'adobe': # f4m manifest fails with 404 in 80% of requests + continue + else: # Other formats not supported yet + continue + + formats.append({ + 'url': video_url, + 'ext': ext, + 'format_id': format_id, + 'quality': quality(fmt.get('quality')), + 'preference': preference, + }) + self._sort_formats(formats) + + title = item['title'] + is_live = item['type'] == 'stream' + if is_live: + title = self._live_title(title) + description = self._og_search_description(webpage, default=None) or self._html_search_meta( + 'description', webpage, 'description', default=None) + timestamp = None + duration = None + if not is_live: + duration = int_or_none(item.get('length')) + timestamp = item.get('published') + if timestamp: + timestamp = parse_iso8601(timestamp[:-5]) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': item.get('image'), + 'duration': duration, + 'timestamp': timestamp, + 'is_live': is_live, + 'formats': formats, + } diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py new file mode 100644 index 000000000..4aef186ea --- /dev/null +++ b/youtube_dl/extractor/playvid.py @@ -0,0 +1,99 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_unquote, + compat_urllib_parse_unquote_plus, +) +from ..utils import ( + clean_html, + ExtractorError, +) + + +class PlayvidIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' + _TESTS = [{ + 'url': 'http://www.playvid.com/watch/RnmBNgtrrJu', + 'md5': 'ffa2f6b2119af359f544388d8c01eb6c', + 'info_dict': { + 'id': 'RnmBNgtrrJu', + 'ext': 'mp4', + 'title': 'md5:9256d01c6317e3f703848b5906880dc8', + 'duration': 82, + 'age_limit': 18, + }, + 'skip': 'Video removed due to ToS', + }, { + 'url': 'http://www.playvid.com/watch/hwb0GpNkzgH', + 'md5': '39d49df503ad7b8f23a4432cbf046477', + 'info_dict': { + 'id': 'hwb0GpNkzgH', + 'ext': 'mp4', + 'title': 'Ellen Euro Cutie Blond Takes a Sexy Survey Get Facial in The Park', + 'age_limit': 18, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + m_error = re.search( + r'<div class="block-error">\s*<div class="heading">\s*<div>(?P<msg>.+?)</div>\s*</div>', webpage) + if m_error: + raise ExtractorError(clean_html(m_error.group('msg')), expected=True) + + video_title = None + duration = None + video_thumbnail = None + formats = [] + + # most of the information is stored in the flashvars + flashvars = self._html_search_regex( + r'flashvars="(.+?)"', webpage, 'flashvars') + + infos = compat_urllib_parse_unquote(flashvars).split(r'&') + for info in infos: + videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info) + if videovars_match: + key = videovars_match.group(1) + val = videovars_match.group(2) + + if key == 'title': + video_title = compat_urllib_parse_unquote_plus(val) + if key == 'duration': + try: + duration = int(val) + except ValueError: + pass + if key == 'big_thumb': + video_thumbnail = val + + videourl_match = re.match( + r'^video_urls\]\[(?P<resolution>[0-9]+)p', key) + if videourl_match: + height = int(videourl_match.group('resolution')) + formats.append({ + 'height': height, + 'url': val, + }) + self._sort_formats(formats) + + # Extract title - should be in the flashvars; if not, look elsewhere + if video_title is None: + video_title = self._html_search_regex( + r'<title>(.*?)</title', webpage, 'title') + + return { + 'id': video_id, + 'formats': formats, + 'title': video_title, + 'thumbnail': video_thumbnail, + 'duration': duration, + 'description': None, + 'age_limit': 18 + } diff --git a/youtube_dl/extractor/playwire.py b/youtube_dl/extractor/playwire.py new file mode 100644 index 000000000..4d96a10a7 --- /dev/null +++ b/youtube_dl/extractor/playwire.py @@ -0,0 +1,75 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + dict_get, + float_or_none, +) + + +class PlaywireIE(InfoExtractor): + _VALID_URL = r'https?://(?:config|cdn)\.playwire\.com(?:/v2)?/(?P<publisher_id>\d+)/(?:videos/v2|embed|config)/(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://config.playwire.com/14907/videos/v2/3353705/player.json', + 'md5': 'e6398701e3595888125729eaa2329ed9', + 'info_dict': { + 'id': '3353705', + 'ext': 'mp4', + 'title': 'S04_RM_UCL_Rus', + 'thumbnail': r're:^https?://.*\.png$', + 'duration': 145.94, + }, + }, { + # m3u8 in f4m + 'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json', + 'info_dict': { + 'id': '4840492', + 'ext': 'mp4', + 'title': 'ITV EL SHOW FULL', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # Multiple resolutions while bitrates missing + 'url': 'http://cdn.playwire.com/11625/embed/85228.html', + 'only_matching': True, + }, { + 'url': 'http://config.playwire.com/12421/videos/v2/3389892/zeus.json', + 'only_matching': True, + }, { + 'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + publisher_id, video_id = mobj.group('publisher_id'), mobj.group('id') + + player = self._download_json( + 'http://config.playwire.com/%s/videos/v2/%s/zeus.json' % (publisher_id, video_id), + video_id) + + title = player['settings']['title'] + duration = float_or_none(player.get('duration'), 1000) + + content = player['content'] + thumbnail = content.get('poster') + src = content['media']['f4m'] + + formats = self._extract_f4m_formats(src, video_id, m3u8_id='hls') + for a_format in formats: + if not dict_get(a_format, ['tbr', 'width', 'height']): + a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0 + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + } diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dl/extractor/pluralsight.py new file mode 100644 index 000000000..abd08bc28 --- /dev/null +++ b/youtube_dl/extractor/pluralsight.py @@ -0,0 +1,501 @@ +from __future__ import unicode_literals + +import collections +import json +import os +import random +import re + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urlparse, +) +from ..utils import ( + dict_get, + ExtractorError, + float_or_none, + int_or_none, + parse_duration, + qualities, + srt_subtitles_timecode, + try_get, + update_url_query, + urlencode_postdata, +) + + +class PluralsightBaseIE(InfoExtractor): + _API_BASE = 'https://app.pluralsight.com' + + _GRAPHQL_EP = '%s/player/api/graphql' % _API_BASE + _GRAPHQL_HEADERS = { + 'Content-Type': 'application/json;charset=UTF-8', + } + _GRAPHQL_COURSE_TMPL = ''' +query BootstrapPlayer { + rpc { + bootstrapPlayer { + profile { + firstName + lastName + email + username + userHandle + authed + isAuthed + plan + } + course(courseId: "%s") { + name + title + courseHasCaptions + translationLanguages { + code + name + } + supportsWideScreenVideoFormats + timestamp + modules { + name + title + duration + formattedDuration + author + authorized + clips { + authorized + clipId + duration + formattedDuration + id + index + moduleIndex + moduleTitle + name + title + watched + } + } + } + } + } +}''' + + def _download_course(self, course_id, url, display_id): + try: + return self._download_course_rpc(course_id, url, display_id) + except ExtractorError: + # Old API fallback + return self._download_json( + 'https://app.pluralsight.com/player/user/api/v1/player/payload', + display_id, data=urlencode_postdata({'courseId': course_id}), + headers={'Referer': url}) + + def _download_course_rpc(self, course_id, url, display_id): + response = self._download_json( + self._GRAPHQL_EP, display_id, data=json.dumps({ + 'query': self._GRAPHQL_COURSE_TMPL % course_id, + 'variables': {} + }).encode('utf-8'), headers=self._GRAPHQL_HEADERS) + + course = try_get( + response, lambda x: x['data']['rpc']['bootstrapPlayer']['course'], + dict) + if course: + return course + + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, response['error']['message']), + expected=True) + + +class PluralsightIE(PluralsightBaseIE): + IE_NAME = 'pluralsight' + _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:training/)?player\?' + _LOGIN_URL = 'https://app.pluralsight.com/id/' + + _NETRC_MACHINE = 'pluralsight' + + _TESTS = [{ + 'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas', + 'md5': '4d458cf5cf4c593788672419a8dd4cf8', + 'info_dict': { + 'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04', + 'ext': 'mp4', + 'title': 'Demo Monitoring', + 'duration': 338, + }, + 'skip': 'Requires pluralsight account credentials', + }, { + 'url': 'https://app.pluralsight.com/training/player?course=angularjs-get-started&author=scott-allen&name=angularjs-get-started-m1-introduction&clip=0&mode=live', + 'only_matching': True, + }, { + # available without pluralsight account + 'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started', + 'only_matching': True, + }, { + 'url': 'https://app.pluralsight.com/player?course=ccna-intro-networking&author=ross-bagurdes&name=ccna-intro-networking-m06&clip=0', + 'only_matching': True, + }] + + GRAPHQL_VIEWCLIP_TMPL = ''' +query viewClip { + viewClip(input: { + author: "%(author)s", + clipIndex: %(clipIndex)d, + courseName: "%(courseName)s", + includeCaptions: %(includeCaptions)s, + locale: "%(locale)s", + mediaType: "%(mediaType)s", + moduleName: "%(moduleName)s", + quality: "%(quality)s" + }) { + urls { + url + cdn + rank + source + }, + status + } +}''' + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'Username': username, + 'Password': password, + }) + + post_url = self._search_regex( + r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, + 'post url', default=self._LOGIN_URL, group='url') + + if not post_url.startswith('http'): + post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) + + response = self._download_webpage( + post_url, None, 'Logging in', + data=urlencode_postdata(login_form), + headers={'Content-Type': 'application/x-www-form-urlencoded'}) + + error = self._search_regex( + r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>', + response, 'error message', default=None) + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + + if all(not re.search(p, response) for p in ( + r'__INITIAL_STATE__', r'["\']currentUser["\']', + # new layout? + r'>\s*Sign out\s*<')): + BLOCKED = 'Your account has been blocked due to suspicious activity' + if BLOCKED in response: + raise ExtractorError( + 'Unable to login: %s' % BLOCKED, expected=True) + MUST_AGREE = 'To continue using Pluralsight, you must agree to' + if any(p in response for p in (MUST_AGREE, '>Disagree<', '>Agree<')): + raise ExtractorError( + 'Unable to login: %s some documents. Go to pluralsight.com, ' + 'log in and agree with what Pluralsight requires.' + % MUST_AGREE, expected=True) + + raise ExtractorError('Unable to log in') + + def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_id): + captions = None + if clip_id: + captions = self._download_json( + '%s/transcript/api/v1/caption/json/%s/%s' + % (self._API_BASE, clip_id, lang), video_id, + 'Downloading captions JSON', 'Unable to download captions JSON', + fatal=False) + if not captions: + captions_post = { + 'a': author, + 'cn': int(clip_idx), + 'lc': lang, + 'm': name, + } + captions = self._download_json( + '%s/player/retrieve-captions' % self._API_BASE, video_id, + 'Downloading captions JSON', 'Unable to download captions JSON', + fatal=False, data=json.dumps(captions_post).encode('utf-8'), + headers={'Content-Type': 'application/json;charset=utf-8'}) + if captions: + return { + lang: [{ + 'ext': 'json', + 'data': json.dumps(captions), + }, { + 'ext': 'srt', + 'data': self._convert_subtitles(duration, captions), + }] + } + + @staticmethod + def _convert_subtitles(duration, subs): + srt = '' + TIME_OFFSET_KEYS = ('displayTimeOffset', 'DisplayTimeOffset') + TEXT_KEYS = ('text', 'Text') + for num, current in enumerate(subs): + current = subs[num] + start, text = ( + float_or_none(dict_get(current, TIME_OFFSET_KEYS, skip_false_values=False)), + dict_get(current, TEXT_KEYS)) + if start is None or text is None: + continue + end = duration if num == len(subs) - 1 else float_or_none( + dict_get(subs[num + 1], TIME_OFFSET_KEYS, skip_false_values=False)) + if end is None: + continue + srt += os.linesep.join( + ( + '%d' % num, + '%s --> %s' % ( + srt_subtitles_timecode(start), + srt_subtitles_timecode(end)), + text, + os.linesep, + )) + return srt + + def _real_extract(self, url): + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + + author = qs.get('author', [None])[0] + name = qs.get('name', [None])[0] + clip_idx = qs.get('clip', [None])[0] + course_name = qs.get('course', [None])[0] + + if any(not f for f in (author, name, clip_idx, course_name,)): + raise ExtractorError('Invalid URL', expected=True) + + display_id = '%s-%s' % (name, clip_idx) + + course = self._download_course(course_name, url, display_id) + + collection = course['modules'] + + clip = None + + for module_ in collection: + if name in (module_.get('moduleName'), module_.get('name')): + for clip_ in module_.get('clips', []): + clip_index = clip_.get('clipIndex') + if clip_index is None: + clip_index = clip_.get('index') + if clip_index is None: + continue + if compat_str(clip_index) == clip_idx: + clip = clip_ + break + + if not clip: + raise ExtractorError('Unable to resolve clip') + + title = clip['title'] + clip_id = clip.get('clipName') or clip.get('name') or clip['clipId'] + + QUALITIES = { + 'low': {'width': 640, 'height': 480}, + 'medium': {'width': 848, 'height': 640}, + 'high': {'width': 1024, 'height': 768}, + 'high-widescreen': {'width': 1280, 'height': 720}, + } + + QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen',) + quality_key = qualities(QUALITIES_PREFERENCE) + + AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities']) + + ALLOWED_QUALITIES = ( + AllowedQuality('webm', ['high', ]), + AllowedQuality('mp4', ['low', 'medium', 'high', ]), + ) + + # Some courses also offer widescreen resolution for high quality (see + # https://github.com/ytdl-org/youtube-dl/issues/7766) + widescreen = course.get('supportsWideScreenVideoFormats') is True + best_quality = 'high-widescreen' if widescreen else 'high' + if widescreen: + for allowed_quality in ALLOWED_QUALITIES: + allowed_quality.qualities.append(best_quality) + + # In order to minimize the number of calls to ViewClip API and reduce + # the probability of being throttled or banned by Pluralsight we will request + # only single format until formats listing was explicitly requested. + if self._downloader.params.get('listformats', False): + allowed_qualities = ALLOWED_QUALITIES + else: + def guess_allowed_qualities(): + req_format = self._downloader.params.get('format') or 'best' + req_format_split = req_format.split('-', 1) + if len(req_format_split) > 1: + req_ext, req_quality = req_format_split + req_quality = '-'.join(req_quality.split('-')[:2]) + for allowed_quality in ALLOWED_QUALITIES: + if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities: + return (AllowedQuality(req_ext, (req_quality, )), ) + req_ext = 'webm' if self._downloader.params.get('prefer_free_formats') else 'mp4' + return (AllowedQuality(req_ext, (best_quality, )), ) + allowed_qualities = guess_allowed_qualities() + + formats = [] + for ext, qualities_ in allowed_qualities: + for quality in qualities_: + f = QUALITIES[quality].copy() + clip_post = { + 'author': author, + 'includeCaptions': 'false', + 'clipIndex': int(clip_idx), + 'courseName': course_name, + 'locale': 'en', + 'moduleName': name, + 'mediaType': ext, + 'quality': '%dx%d' % (f['width'], f['height']), + } + format_id = '%s-%s' % (ext, quality) + + try: + viewclip = self._download_json( + self._GRAPHQL_EP, display_id, + 'Downloading %s viewclip graphql' % format_id, + data=json.dumps({ + 'query': self.GRAPHQL_VIEWCLIP_TMPL % clip_post, + 'variables': {} + }).encode('utf-8'), + headers=self._GRAPHQL_HEADERS)['data']['viewClip'] + except ExtractorError: + # Still works but most likely will go soon + viewclip = self._download_json( + '%s/video/clips/viewclip' % self._API_BASE, display_id, + 'Downloading %s viewclip JSON' % format_id, fatal=False, + data=json.dumps(clip_post).encode('utf-8'), + headers={'Content-Type': 'application/json;charset=utf-8'}) + + # Pluralsight tracks multiple sequential calls to ViewClip API and start + # to return 429 HTTP errors after some time (see + # https://github.com/ytdl-org/youtube-dl/pull/6989). Moreover it may even lead + # to account ban (see https://github.com/ytdl-org/youtube-dl/issues/6842). + # To somewhat reduce the probability of these consequences + # we will sleep random amount of time before each call to ViewClip. + self._sleep( + random.randint(2, 5), display_id, + '%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling') + + if not viewclip: + continue + + clip_urls = viewclip.get('urls') + if not isinstance(clip_urls, list): + continue + + for clip_url_data in clip_urls: + clip_url = clip_url_data.get('url') + if not clip_url: + continue + cdn = clip_url_data.get('cdn') + clip_f = f.copy() + clip_f.update({ + 'url': clip_url, + 'ext': ext, + 'format_id': '%s-%s' % (format_id, cdn) if cdn else format_id, + 'quality': quality_key(quality), + 'source_preference': int_or_none(clip_url_data.get('rank')), + }) + formats.append(clip_f) + + self._sort_formats(formats) + + duration = int_or_none( + clip.get('duration')) or parse_duration(clip.get('formattedDuration')) + + # TODO: other languages? + subtitles = self.extract_subtitles( + author, clip_idx, clip.get('clipId'), 'en', name, duration, display_id) + + return { + 'id': clip_id, + 'title': title, + 'duration': duration, + 'creator': author, + 'formats': formats, + 'subtitles': subtitles, + } + + +class PluralsightCourseIE(PluralsightBaseIE): + IE_NAME = 'pluralsight:course' + _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:library/)?courses/(?P<id>[^/]+)' + _TESTS = [{ + # Free course from Pluralsight Starter Subscription for Microsoft TechNet + # https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz + 'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas', + 'info_dict': { + 'id': 'hosting-sql-server-windows-azure-iaas', + 'title': 'Hosting SQL Server in Microsoft Azure IaaS Fundamentals', + 'description': 'md5:61b37e60f21c4b2f91dc621a977d0986', + }, + 'playlist_count': 31, + }, { + # available without pluralsight account + 'url': 'https://www.pluralsight.com/courses/angularjs-get-started', + 'only_matching': True, + }, { + 'url': 'https://app.pluralsight.com/library/courses/understanding-microsoft-azure-amazon-aws/table-of-contents', + 'only_matching': True, + }] + + def _real_extract(self, url): + course_id = self._match_id(url) + + # TODO: PSM cookie + + course = self._download_course(course_id, url, course_id) + + title = course['title'] + course_name = course['name'] + course_data = course['modules'] + description = course.get('description') or course.get('shortDescription') + + entries = [] + for num, module in enumerate(course_data, 1): + author = module.get('author') + module_name = module.get('name') + if not author or not module_name: + continue + for clip in module.get('clips', []): + clip_index = int_or_none(clip.get('index')) + if clip_index is None: + continue + clip_url = update_url_query( + '%s/player' % self._API_BASE, query={ + 'mode': 'live', + 'course': course_name, + 'author': author, + 'name': module_name, + 'clip': clip_index, + }) + entries.append({ + '_type': 'url_transparent', + 'url': clip_url, + 'ie_key': PluralsightIE.ie_key(), + 'chapter': module.get('title'), + 'chapter_number': num, + 'chapter_id': module.get('moduleRef'), + }) + + return self.playlist_result(entries, course_id, title, description) diff --git a/youtube_dl/extractor/podomatic.py b/youtube_dl/extractor/podomatic.py new file mode 100644 index 000000000..e782e3f1f --- /dev/null +++ b/youtube_dl/extractor/podomatic.py @@ -0,0 +1,76 @@ +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor +from ..utils import int_or_none + + +class PodomaticIE(InfoExtractor): + IE_NAME = 'podomatic' + _VALID_URL = r'''(?x) + (?P<proto>https?):// + (?: + (?P<channel>[^.]+)\.podomatic\.com/entry| + (?:www\.)?podomatic\.com/podcasts/(?P<channel_2>[^/]+)/episodes + )/ + (?P<id>[^/?#&]+) + ''' + + _TESTS = [{ + 'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00', + 'md5': '84bb855fcf3429e6bf72460e1eed782d', + 'info_dict': { + 'id': '2009-01-02T16_03_35-08_00', + 'ext': 'mp3', + 'uploader': 'Science Teaching Tips', + 'uploader_id': 'scienceteachingtips', + 'title': '64. When the Moon Hits Your Eye', + 'duration': 446, + } + }, { + 'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00', + 'md5': 'd2cf443931b6148e27638650e2638297', + 'info_dict': { + 'id': '2013-11-15T16_31_21-08_00', + 'ext': 'mp3', + 'uploader': 'Ostbahnhof / Techno Mix', + 'uploader_id': 'ostbahnhof', + 'title': 'Einunddreizig', + 'duration': 3799, + } + }, { + 'url': 'https://www.podomatic.com/podcasts/scienceteachingtips/episodes/2009-01-02T16_03_35-08_00', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + channel = mobj.group('channel') or mobj.group('channel_2') + + json_url = (('%s://%s.podomatic.com/entry/embed_params/%s' + + '?permalink=true&rtmp=0') % + (mobj.group('proto'), channel, video_id)) + data_json = self._download_webpage( + json_url, video_id, 'Downloading video info') + data = json.loads(data_json) + + video_url = data['downloadLink'] + if not video_url: + video_url = '%s/%s' % (data['streamer'].replace('rtmp', 'http'), data['mediaLocation']) + uploader = data['podcast'] + title = data['title'] + thumbnail = data['imageLocation'] + duration = int_or_none(data.get('length'), 1000) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'uploader': uploader, + 'uploader_id': channel, + 'thumbnail': thumbnail, + 'duration': duration, + } diff --git a/youtube_dl/extractor/pokemon.py b/youtube_dl/extractor/pokemon.py new file mode 100644 index 000000000..dd5f17f11 --- /dev/null +++ b/youtube_dl/extractor/pokemon.py @@ -0,0 +1,75 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + extract_attributes, + int_or_none, +) + + +class PokemonIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))' + _TESTS = [{ + 'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/', + 'md5': '2fe8eaec69768b25ef898cda9c43062e', + 'info_dict': { + 'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4', + 'ext': 'mp4', + 'title': 'The Ol’ Raise and Switch!', + 'description': 'md5:7db77f7107f98ba88401d3adc80ff7af', + 'timestamp': 1511824728, + 'upload_date': '20171127', + }, + 'add_id': ['LimelightMedia'], + }, { + # no data-video-title + 'url': 'https://www.pokemon.com/us/pokemon-episodes/pokemon-movies/pokemon-the-rise-of-darkrai-2008', + 'info_dict': { + 'id': '99f3bae270bf4e5097274817239ce9c8', + 'ext': 'mp4', + 'title': 'Pokémon: The Rise of Darkrai', + 'description': 'md5:ea8fbbf942e1e497d54b19025dd57d9d', + 'timestamp': 1417778347, + 'upload_date': '20141205', + }, + 'add_id': ['LimelightMedia'], + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2', + 'only_matching': True, + }, { + 'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/', + 'only_matching': True, + }, { + 'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id, display_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, video_id or display_id) + video_data = extract_attributes(self._search_regex( + r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'), + webpage, 'video data element')) + video_id = video_data['data-video-id'] + title = video_data.get('data-video-title') or self._html_search_meta( + 'pkm-title', webpage, ' title', default=None) or self._search_regex( + r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title') + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': 'limelight:media:%s' % video_id, + 'title': title, + 'description': video_data.get('data-video-summary'), + 'thumbnail': video_data.get('data-video-poster'), + 'series': 'Pokémon', + 'season_number': int_or_none(video_data.get('data-video-season')), + 'episode': title, + 'episode_number': int_or_none(video_data.get('data-video-episode')), + 'ie_key': 'LimelightMedia', + } diff --git a/youtube_dl/extractor/polskieradio.py b/youtube_dl/extractor/polskieradio.py new file mode 100644 index 000000000..978d6f813 --- /dev/null +++ b/youtube_dl/extractor/polskieradio.py @@ -0,0 +1,180 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools +import re + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urllib_parse_unquote, + compat_urlparse +) +from ..utils import ( + extract_attributes, + int_or_none, + strip_or_none, + unified_timestamp, +) + + +class PolskieRadioIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie', + 'info_dict': { + 'id': '1587943', + 'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie', + 'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5', + }, + 'playlist': [{ + 'md5': '2984ee6ce9046d91fc233bc1a864a09a', + 'info_dict': { + 'id': '1540576', + 'ext': 'mp3', + 'title': 'md5:d4623290d4ac983bf924061c75c23a0d', + 'timestamp': 1456594200, + 'upload_date': '20160227', + 'duration': 2364, + 'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$' + }, + }], + }, { + 'url': 'http://www.polskieradio.pl/265/5217/Artykul/1635803,Euro-2016-nie-ma-miejsca-na-blad-Polacy-graja-ze-Szwajcaria-o-cwiercfinal', + 'info_dict': { + 'id': '1635803', + 'title': 'Euro 2016: nie ma miejsca na błąd. Polacy grają ze Szwajcarią o ćwierćfinał', + 'description': 'md5:01cb7d0cad58664095d72b51a1ebada2', + }, + 'playlist_mincount': 12, + }, { + 'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis', + 'only_matching': True, + }, { + 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943', + 'only_matching': True, + }, { + # with mp4 video + 'url': 'http://www.polskieradio.pl/9/299/Artykul/1634903,Brexit-Leszek-Miller-swiat-sie-nie-zawali-Europa-bedzie-trwac-dalej', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + content = self._search_regex( + r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>', + webpage, 'content') + + timestamp = unified_timestamp(self._html_search_regex( + r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>', + webpage, 'timestamp', fatal=False)) + + thumbnail_url = self._og_search_thumbnail(webpage) + + entries = [] + + media_urls = set() + + for data_media in re.findall(r'<[^>]+data-media=({[^>]+})', content): + media = self._parse_json(data_media, playlist_id, fatal=False) + if not media.get('file') or not media.get('desc'): + continue + media_url = self._proto_relative_url(media['file'], 'http:') + if media_url in media_urls: + continue + media_urls.add(media_url) + entries.append({ + 'id': compat_str(media['id']), + 'url': media_url, + 'title': compat_urllib_parse_unquote(media['desc']), + 'duration': int_or_none(media.get('length')), + 'vcodec': 'none' if media.get('provider') == 'audio' else None, + 'timestamp': timestamp, + 'thumbnail': thumbnail_url + }) + + title = self._og_search_title(webpage).strip() + description = strip_or_none(self._og_search_description(webpage)) + + return self.playlist_result(entries, playlist_id, title, description) + + +class PolskieRadioCategoryIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(?:,[^/]+)?/(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://www.polskieradio.pl/7/5102,HISTORIA-ZYWA', + 'info_dict': { + 'id': '5102', + 'title': 'HISTORIA ŻYWA', + }, + 'playlist_mincount': 38, + }, { + 'url': 'http://www.polskieradio.pl/7/4807', + 'info_dict': { + 'id': '4807', + 'title': 'Vademecum 1050. rocznicy Chrztu Polski' + }, + 'playlist_mincount': 5 + }, { + 'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source', + 'only_matching': True + }, { + 'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow', + 'info_dict': { + 'id': '4143', + 'title': 'Kierunek Kraków', + }, + 'playlist_mincount': 61 + }, { + 'url': 'http://www.polskieradio.pl/10,czworka/214,muzyka', + 'info_dict': { + 'id': '214', + 'title': 'Muzyka', + }, + 'playlist_mincount': 61 + }, { + 'url': 'http://www.polskieradio.pl/7,Jedynka/5102,HISTORIA-ZYWA', + 'only_matching': True, + }, { + 'url': 'http://www.polskieradio.pl/8,Dwojka/196,Publicystyka', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if PolskieRadioIE.suitable(url) else super(PolskieRadioCategoryIE, cls).suitable(url) + + def _entries(self, url, page, category_id): + content = page + for page_num in itertools.count(2): + for a_entry, entry_id in re.findall( + r'(?s)<article[^>]+>.*?(<a[^>]+href=["\']/\d+/\d+/Artykul/(\d+)[^>]+>).*?</article>', + content): + entry = extract_attributes(a_entry) + href = entry.get('href') + if not href: + continue + yield self.url_result( + compat_urlparse.urljoin(url, href), PolskieRadioIE.ie_key(), + entry_id, entry.get('title')) + mobj = re.search( + r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', + content) + if not mobj: + break + next_url = compat_urlparse.urljoin(url, mobj.group('url')) + content = self._download_webpage( + next_url, category_id, 'Downloading page %s' % page_num) + + def _real_extract(self, url): + category_id = self._match_id(url) + webpage = self._download_webpage(url, category_id) + title = self._html_search_regex( + r'<title>([^<]+) - [^<]+ - [^<]+', + webpage, 'title', fatal=False) + return self.playlist_result( + self._entries(url, webpage, category_id), + category_id, title) diff --git a/youtube_dl/extractor/popcorntv.py b/youtube_dl/extractor/popcorntv.py new file mode 100644 index 000000000..9f834fb6c --- /dev/null +++ b/youtube_dl/extractor/popcorntv.py @@ -0,0 +1,76 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + extract_attributes, + int_or_none, + unified_timestamp, +) + + +class PopcornTVIE(InfoExtractor): + _VALID_URL = r'https?://[^/]+\.popcorntv\.it/guarda/(?P[^/]+)/(?P\d+)' + _TESTS = [{ + 'url': 'https://animemanga.popcorntv.it/guarda/food-wars-battaglie-culinarie-episodio-01/9183', + 'md5': '47d65a48d147caf692ab8562fe630b45', + 'info_dict': { + 'id': '9183', + 'display_id': 'food-wars-battaglie-culinarie-episodio-01', + 'ext': 'mp4', + 'title': 'Food Wars, Battaglie Culinarie | Episodio 01', + 'description': 'md5:b8bea378faae4651d3b34c6e112463d0', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1497610857, + 'upload_date': '20170616', + 'duration': 1440, + 'view_count': int, + }, + }, { + 'url': 'https://cinema.popcorntv.it/guarda/smash-cut/10433', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id, video_id = mobj.group('display_id', 'id') + + webpage = self._download_webpage(url, display_id) + + m3u8_url = extract_attributes( + self._search_regex( + r'(]+itemprop=["\'](?:content|embed)Url[^>]*>)', + webpage, 'content' + ))['href'] + + formats = self._extract_m3u8_formats( + m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + + title = self._search_regex( + r']+itemprop=["\']name[^>]*>([^<]+)', webpage, + 'title', default=None) or self._og_search_title(webpage) + + description = self._html_search_regex( + r'(?s)]+itemprop=["\']description[^>]*>(.+?)', + webpage, 'description', fatal=False) + thumbnail = self._og_search_thumbnail(webpage) + timestamp = unified_timestamp(self._html_search_meta( + 'uploadDate', webpage, 'timestamp')) + duration = int_or_none(self._html_search_meta( + 'duration', webpage), invscale=60) + view_count = int_or_none(self._html_search_meta( + 'interactionCount', webpage, 'view count')) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'view_count': view_count, + 'formats': formats, + } diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py new file mode 100644 index 000000000..24c3600fe --- /dev/null +++ b/youtube_dl/extractor/porn91.py @@ -0,0 +1,58 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + int_or_none, + ExtractorError, +) + + +class Porn91IE(InfoExtractor): + IE_NAME = '91porn' + _VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/.+?\?viewkey=(?P[\w\d]+)' + + _TEST = { + 'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134', + 'md5': '7fcdb5349354f40d41689bd0fa8db05a', + 'info_dict': { + 'id': '7e42283b4f5ab36da134', + 'title': '18岁大一漂亮学妹,水嫩性感,再爽一次!', + 'ext': 'mp4', + 'duration': 431, + 'age_limit': 18, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + self._set_cookie('91porn.com', 'language', 'cn_CN') + + webpage = self._download_webpage( + 'http://91porn.com/view_video.php?viewkey=%s' % video_id, video_id) + + if '作为游客,你每天只可观看10个视频' in webpage: + raise ExtractorError('91 Porn says: Daily limit 10 videos exceeded', expected=True) + + title = self._search_regex( + r'
    ([^<]+)
    ', webpage, 'title') + title = title.replace('\n', '') + + info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0] + + duration = parse_duration(self._search_regex( + r'时长:\s*\s*(\d+:\d+)', webpage, 'duration', fatal=False)) + + comment_count = int_or_none(self._search_regex( + r'留言:\s*\s*(\d+)', webpage, 'comment count', fatal=False)) + + info_dict.update({ + 'id': video_id, + 'title': title, + 'duration': duration, + 'comment_count': comment_count, + 'age_limit': self._rta_search(webpage), + }) + + return info_dict diff --git a/youtube_dl/extractor/porncom.py b/youtube_dl/extractor/porncom.py new file mode 100644 index 000000000..5726cab3a --- /dev/null +++ b/youtube_dl/extractor/porncom.py @@ -0,0 +1,103 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( + int_or_none, + js_to_json, + parse_filesize, + str_to_int, +) + + +class PornComIE(InfoExtractor): + _VALID_URL = r'https?://(?:[a-zA-Z]+\.)?porn\.com/videos/(?:(?P[^/]+)-)?(?P\d+)' + _TESTS = [{ + 'url': 'http://www.porn.com/videos/teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec-2603339', + 'md5': '3f30ce76267533cd12ba999263156de7', + 'info_dict': { + 'id': '2603339', + 'display_id': 'teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec', + 'ext': 'mp4', + 'title': 'Teen grabs a dildo and fucks her pussy live on 1hottie, I rec', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 551, + 'view_count': int, + 'age_limit': 18, + 'categories': list, + 'tags': list, + }, + }, { + 'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id + + webpage = self._download_webpage(url, display_id) + + config = self._parse_json( + self._search_regex( + (r'=\s*({.+?})\s*;\s*v1ar\b', + r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*='), + webpage, 'config', default='{}'), + display_id, transform_source=js_to_json, fatal=False) + + if config: + title = config['title'] + formats = [{ + 'url': stream['url'], + 'format_id': stream.get('id'), + 'height': int_or_none(self._search_regex( + r'^(\d+)[pP]', stream.get('id') or '', 'height', default=None)) + } for stream in config['streams'] if stream.get('url')] + thumbnail = (compat_urlparse.urljoin( + config['thumbCDN'], config['poster']) + if config.get('thumbCDN') and config.get('poster') else None) + duration = int_or_none(config.get('length')) + else: + title = self._search_regex( + (r'([^<]+)', r']*>([^<]+)'), + webpage, 'title') + formats = [{ + 'url': compat_urlparse.urljoin(url, format_url), + 'format_id': '%sp' % height, + 'height': int(height), + 'filesize_approx': parse_filesize(filesize), + } for format_url, height, filesize in re.findall( + r']+href="(/download/[^"]+)">[^<]*?(\d+)p]*>(\d+\s*[a-zA-Z]+)<', + webpage)] + thumbnail = None + duration = None + + self._sort_formats(formats) + + view_count = str_to_int(self._search_regex( + (r'Views:\s*\s*\s*([\d,.]+)', + r'class=["\']views["\'][^>]*>

    ([\d,.]+)'), webpage, + 'view count', fatal=False)) + + def extract_list(kind): + s = self._search_regex( + (r'(?s)%s:\s*\s*(.+?)' % kind.capitalize(), + r'(?s)]*>%s:(.+?)

    ' % kind.capitalize()), + webpage, kind, fatal=False) + return re.findall(r']+>([^<]+)', s or '') + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'view_count': view_count, + 'formats': formats, + 'age_limit': 18, + 'categories': extract_list('categories'), + 'tags': extract_list('tags'), + } diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py new file mode 100644 index 000000000..27d65d4b9 --- /dev/null +++ b/youtube_dl/extractor/pornhd.py @@ -0,0 +1,109 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, + js_to_json, + urljoin, +) + + +class PornHdIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P\d+)(?:/(?P.+))?' + _TESTS = [{ + 'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', + 'md5': '87f1540746c1d32ec7a2305c12b96b25', + 'info_dict': { + 'id': '9864', + 'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', + 'ext': 'mp4', + 'title': 'Restroom selfie masturbation', + 'description': 'md5:3748420395e03e31ac96857a8f125b2b', + 'thumbnail': r're:^https?://.*\.jpg', + 'view_count': int, + 'like_count': int, + 'age_limit': 18, + } + }, { + # removed video + 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', + 'md5': '956b8ca569f7f4d8ec563e2c41598441', + 'info_dict': { + 'id': '1962', + 'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video', + 'ext': 'mp4', + 'title': 'Sierra loves doing laundry', + 'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294', + 'thumbnail': r're:^https?://.*\.jpg', + 'view_count': int, + 'like_count': int, + 'age_limit': 18, + }, + 'skip': 'Not available anymore', + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') + + webpage = self._download_webpage(url, display_id or video_id) + + title = self._html_search_regex( + [r']+class=["\']video-name["\'][^>]*>([^<]+)', + r'(.+?) - .*?[Pp]ornHD.*?'], webpage, 'title') + + sources = self._parse_json(js_to_json(self._search_regex( + r"(?s)sources'?\s*[:=]\s*(\{.+?\})", + webpage, 'sources', default='{}')), video_id) + + if not sources: + message = self._html_search_regex( + r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P.+?)]+class="description"[^>]*>(?P[^<]+)(?:(?!\1).)+)\1", webpage, + 'thumbnail', fatal=False, group='url') + + like_count = int_or_none(self._search_regex( + (r'(\d+)\s*]+>(?: |\s)*\blikes', + r'class=["\']save-count["\'][^>]*>\s*(\d+)'), + webpage, 'like count', fatal=False)) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'view_count': view_count, + 'like_count': like_count, + 'formats': formats, + 'age_limit': 18, + } diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py new file mode 100644 index 000000000..11b8cfcf7 --- /dev/null +++ b/youtube_dl/extractor/pornhub.py @@ -0,0 +1,582 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import functools +import itertools +import operator +import re + +from .common import InfoExtractor +from ..compat import ( + compat_HTTPError, + compat_str, + compat_urllib_request, +) +from .openload import PhantomJSwrapper +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, + orderedSet, + remove_quotes, + str_to_int, + url_or_none, +) + + +class PornHubBaseIE(InfoExtractor): + def _download_webpage_handle(self, *args, **kwargs): + def dl(*args, **kwargs): + return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs) + + webpage, urlh = dl(*args, **kwargs) + + if any(re.search(p, webpage) for p in ( + r']+\bonload=["\']go\(\)', + r'document\.cookie\s*=\s*["\']RNKEY=', + r'document\.location\.reload\(true\)')): + url_or_request = args[0] + url = (url_or_request.get_full_url() + if isinstance(url_or_request, compat_urllib_request.Request) + else url_or_request) + phantom = PhantomJSwrapper(self, required_version='2.0') + phantom.get(url, html=webpage) + webpage, urlh = dl(*args, **kwargs) + + return webpage, urlh + + +class PornHubIE(PornHubBaseIE): + IE_DESC = 'PornHub and Thumbzilla' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| + (?:www\.)?thumbzilla\.com/video/ + ) + (?P[\da-z]+) + ''' + _TESTS = [{ + 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', + 'md5': '1e19b41231a02eba417839222ac9d58e', + 'info_dict': { + 'id': '648719015', + 'ext': 'mp4', + 'title': 'Seductive Indian beauty strips down and fingers her pink pussy', + 'uploader': 'Babes', + 'upload_date': '20130628', + 'duration': 361, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 18, + 'tags': list, + 'categories': list, + }, + }, { + # non-ASCII title + 'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002', + 'info_dict': { + 'id': '1331683002', + 'ext': 'mp4', + 'title': '重庆婷婷女王足交', + 'uploader': 'Unknown', + 'upload_date': '20150213', + 'duration': 1753, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 18, + 'tags': list, + 'categories': list, + }, + 'params': { + 'skip_download': True, + }, + }, { + # subtitles + 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7', + 'info_dict': { + 'id': 'ph5af5fef7c2aa7', + 'ext': 'mp4', + 'title': 'BFFS - Cute Teen Girls Share Cock On the Floor', + 'uploader': 'BFFs', + 'duration': 622, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 18, + 'tags': list, + 'categories': list, + 'subtitles': { + 'en': [{ + "ext": 'srt' + }] + }, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', + 'only_matching': True, + }, { + # removed at the request of cam4.com + 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862', + 'only_matching': True, + }, { + # removed at the request of the copyright owner + 'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859', + 'only_matching': True, + }, { + # removed by uploader + 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111', + 'only_matching': True, + }, { + # private video + 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7', + 'only_matching': True, + }, { + 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex', + 'only_matching': True, + }, { + 'url': 'http://www.pornhub.com/video/show?viewkey=648719015', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+?src=["\'](?P(?:https?:)?//(?:www\.)?pornhub\.(?:com|net)/embed/[\da-z]+)', + webpage) + + def _extract_count(self, pattern, webpage, name): + return str_to_int(self._search_regex( + pattern, webpage, '%s count' % name, fatal=False)) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') or 'pornhub.com' + video_id = mobj.group('id') + + self._set_cookie(host, 'age_verified', '1') + + def dl_webpage(platform): + self._set_cookie(host, 'platform', platform) + return self._download_webpage( + 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id), + video_id, 'Downloading %s webpage' % platform) + + webpage = dl_webpage('pc') + + error_msg = self._html_search_regex( + r'(?s)]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P.+?)', + webpage, 'error message', default=None, group='error') + if error_msg: + error_msg = re.sub(r'\s+', ' ', error_msg) + raise ExtractorError( + 'PornHub said: %s' % error_msg, + expected=True, video_id=video_id) + + # video_title from flashvars contains whitespace instead of non-ASCII (see + # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying + # on that anymore. + title = self._html_search_meta( + 'twitter:title', webpage, default=None) or self._search_regex( + (r']+class=["\']title["\'][^>]*>(?P[^<]+)', + r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1', + r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'), + webpage, 'title', group='title') + + video_urls = [] + video_urls_set = set() + subtitles = {} + + flashvars = self._parse_json( + self._search_regex( + r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'), + video_id) + if flashvars: + subtitle_url = url_or_none(flashvars.get('closedCaptionsFile')) + if subtitle_url: + subtitles.setdefault('en', []).append({ + 'url': subtitle_url, + 'ext': 'srt', + }) + thumbnail = flashvars.get('image_url') + duration = int_or_none(flashvars.get('video_duration')) + media_definitions = flashvars.get('mediaDefinitions') + if isinstance(media_definitions, list): + for definition in media_definitions: + if not isinstance(definition, dict): + continue + video_url = definition.get('videoUrl') + if not video_url or not isinstance(video_url, compat_str): + continue + if video_url in video_urls_set: + continue + video_urls_set.add(video_url) + video_urls.append( + (video_url, int_or_none(definition.get('quality')))) + else: + thumbnail, duration = [None] * 2 + + if not video_urls: + tv_webpage = dl_webpage('tv') + + assignments = self._search_regex( + r'(var.+?mediastring.+?)</script>', tv_webpage, + 'encoded url').split(';') + + js_vars = {} + + def parse_js_value(inp): + inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp) + if '+' in inp: + inps = inp.split('+') + return functools.reduce( + operator.concat, map(parse_js_value, inps)) + inp = inp.strip() + if inp in js_vars: + return js_vars[inp] + return remove_quotes(inp) + + for assn in assignments: + assn = assn.strip() + if not assn: + continue + assn = re.sub(r'var\s+', '', assn) + vname, value = assn.split('=', 1) + js_vars[vname] = parse_js_value(value) + + video_url = js_vars['mediastring'] + if video_url not in video_urls_set: + video_urls.append((video_url, None)) + video_urls_set.add(video_url) + + for mobj in re.finditer( + r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage): + video_url = mobj.group('url') + if video_url not in video_urls_set: + video_urls.append((video_url, None)) + video_urls_set.add(video_url) + + upload_date = None + formats = [] + for video_url, height in video_urls: + if not upload_date: + upload_date = self._search_regex( + r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None) + if upload_date: + upload_date = upload_date.replace('/', '') + if determine_ext(video_url) == 'mpd': + formats.extend(self._extract_mpd_formats( + video_url, video_id, mpd_id='dash', fatal=False)) + continue + tbr = None + mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url) + if mobj: + if not height: + height = int(mobj.group('height')) + tbr = int(mobj.group('tbr')) + formats.append({ + 'url': video_url, + 'format_id': '%dp' % height if height else None, + 'height': height, + 'tbr': tbr, + }) + self._sort_formats(formats) + + video_uploader = self._html_search_regex( + r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', + webpage, 'uploader', fatal=False) + + view_count = self._extract_count( + r'<span class="count">([\d,\.]+)</span> views', webpage, 'view') + like_count = self._extract_count( + r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like') + dislike_count = self._extract_count( + r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike') + comment_count = self._extract_count( + r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') + + def extract_list(meta_key): + div = self._search_regex( + r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>' + % meta_key, webpage, meta_key, default=None) + if div: + return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div) + + return { + 'id': video_id, + 'uploader': video_uploader, + 'upload_date': upload_date, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'view_count': view_count, + 'like_count': like_count, + 'dislike_count': dislike_count, + 'comment_count': comment_count, + 'formats': formats, + 'age_limit': 18, + 'tags': extract_list('tags'), + 'categories': extract_list('categories'), + 'subtitles': subtitles, + } + + +class PornHubPlaylistBaseIE(PornHubBaseIE): + def _extract_entries(self, webpage, host): + # Only process container div with main playlist content skipping + # drop-down menu that uses similar pattern for videos (see + # https://github.com/ytdl-org/youtube-dl/issues/11594). + container = self._search_regex( + r'(?s)(<div[^>]+class=["\']container.+)', webpage, + 'container', default=webpage) + + return [ + self.url_result( + 'http://www.%s/%s' % (host, video_url), + PornHubIE.ie_key(), video_title=title) + for video_url, title in orderedSet(re.findall( + r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"', + container)) + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + playlist_id = mobj.group('id') + + webpage = self._download_webpage(url, playlist_id) + + entries = self._extract_entries(webpage, host) + + playlist = self._parse_json( + self._search_regex( + r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage, + 'playlist', default='{}'), + playlist_id, fatal=False) + title = playlist.get('title') or self._search_regex( + r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False) + + return self.playlist_result( + entries, playlist_id, title, playlist.get('description')) + + +class PornHubUserIE(PornHubPlaylistBaseIE): + _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' + _TESTS = [{ + 'url': 'https://www.pornhub.com/model/zoe_ph', + 'playlist_mincount': 118, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious', + 'info_dict': { + 'id': 'liz-vicious', + }, + 'playlist_mincount': 118, + }, { + 'url': 'https://www.pornhub.com/users/russianveet69', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/channels/povd', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + user_id = mobj.group('id') + return self.url_result( + '%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(), + video_id=user_id) + + +class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + item_id = mobj.group('id') + + page = int_or_none(self._search_regex( + r'\bpage=(\d+)', url, 'page', default=None)) + + page_url = self._make_page_url(url) + + entries = [] + for page_num in (page, ) if page is not None else itertools.count(1): + try: + webpage = self._download_webpage( + page_url, item_id, 'Downloading page %d' % page_num, + query={'page': page_num}) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: + break + raise + page_entries = self._extract_entries(webpage, host) + if not page_entries: + break + entries.extend(page_entries) + if not self._has_more(webpage): + break + + return self.playlist_result(orderedSet(entries), item_id) + + +class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): + _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.pornhub.com/model/zoe_ph/videos', + 'only_matching': True, + }, { + 'url': 'http://www.pornhub.com/users/rushandlia/videos', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos', + 'info_dict': { + 'id': 'pornstar/jenny-blighe/videos', + }, + 'playlist_mincount': 149, + }, { + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3', + 'info_dict': { + 'id': 'pornstar/jenny-blighe/videos', + }, + 'playlist_mincount': 40, + }, { + # default sorting as Top Rated Videos + 'url': 'https://www.pornhub.com/channels/povd/videos', + 'info_dict': { + 'id': 'channels/povd/videos', + }, + 'playlist_mincount': 293, + }, { + # Top Rated Videos + 'url': 'https://www.pornhub.com/channels/povd/videos?o=ra', + 'only_matching': True, + }, { + # Most Recent Videos + 'url': 'https://www.pornhub.com/channels/povd/videos?o=da', + 'only_matching': True, + }, { + # Most Viewed Videos + 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi', + 'only_matching': True, + }, { + 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', + 'only_matching': True, + }, { + # Most Viewed Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv', + 'only_matching': True, + }, { + # Top Rated Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr', + 'only_matching': True, + }, { + # Longest Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg', + 'only_matching': True, + }, { + # Newest Videos + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video/search?search=123', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/categories/teen', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/categories/teen?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/hd', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/hd?page=3', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/described-video', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/described-video?page=2', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn', + 'only_matching': True, + }, { + 'url': 'https://www.pornhub.com/playlist/44121572', + 'info_dict': { + 'id': 'playlist/44121572', + }, + 'playlist_mincount': 132, + }, { + 'url': 'https://www.pornhub.com/playlist/4667351', + 'only_matching': True, + }, { + 'url': 'https://de.pornhub.com/playlist/4667351', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return (False + if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) + else super(PornHubPagedVideoListIE, cls).suitable(url)) + + def _make_page_url(self, url): + return url + + @staticmethod + def _has_more(webpage): + return re.search( + r'''(?x) + <li[^>]+\bclass=["\']page_next| + <link[^>]+\brel=["\']next| + <button[^>]+\bid=["\']moreDataBtn + ''', webpage) is not None + + +class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): + _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' + _TESTS = [{ + 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', + 'info_dict': { + 'id': 'jenny-blighe', + }, + 'playlist_mincount': 129, + }, { + 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', + 'only_matching': True, + }] + + def _make_page_url(self, url): + mobj = re.match(self._VALID_URL, url) + return '%s/ajax' % mobj.group('url') + + @staticmethod + def _has_more(webpage): + return True diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dl/extractor/pornotube.py new file mode 100644 index 000000000..1b5b9a320 --- /dev/null +++ b/youtube_dl/extractor/pornotube.py @@ -0,0 +1,85 @@ +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..utils import int_or_none + + +class PornotubeIE(InfoExtractor): + _VALID_URL = r'https?://(?:\w+\.)?pornotube\.com/(?:[^?#]*?)/video/(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.pornotube.com/orientation/straight/video/4964/title/weird-hot-and-wet-science', + 'md5': '60fc5a4f0d93a97968fc7999d98260c9', + 'info_dict': { + 'id': '4964', + 'ext': 'mp4', + 'upload_date': '20141203', + 'title': 'Weird Hot and Wet Science', + 'description': 'md5:a8304bef7ef06cb4ab476ca6029b01b0', + 'categories': ['Adult Humor', 'Blondes'], + 'uploader': 'Alpha Blue Archives', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1417582800, + 'age_limit': 18, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + token = self._download_json( + 'https://api.aebn.net/auth/v2/origins/authenticate', + video_id, note='Downloading token', + data=json.dumps({'credentials': 'Clip Application'}).encode('utf-8'), + headers={ + 'Content-Type': 'application/json', + 'Origin': 'http://www.pornotube.com', + })['tokenKey'] + + video_url = self._download_json( + 'https://api.aebn.net/delivery/v1/clips/%s/MP4' % video_id, + video_id, note='Downloading delivery information', + headers={'Authorization': token})['mediaUrl'] + + FIELDS = ( + 'title', 'description', 'startSecond', 'endSecond', 'publishDate', + 'studios{name}', 'categories{name}', 'movieId', 'primaryImageNumber' + ) + + info = self._download_json( + 'https://api.aebn.net/content/v2/clips/%s?fields=%s' + % (video_id, ','.join(FIELDS)), video_id, + note='Downloading metadata', + headers={'Authorization': token}) + + if isinstance(info, list): + info = info[0] + + title = info['title'] + + timestamp = int_or_none(info.get('publishDate'), scale=1000) + uploader = info.get('studios', [{}])[0].get('name') + movie_id = info.get('movieId') + primary_image_number = info.get('primaryImageNumber') + thumbnail = None + if movie_id and primary_image_number: + thumbnail = 'http://pic.aebn.net/dis/t/%s/%s_%08d.jpg' % ( + movie_id, movie_id, primary_image_number) + start = int_or_none(info.get('startSecond')) + end = int_or_none(info.get('endSecond')) + duration = end - start if start and end else None + categories = [c['name'] for c in info.get('categories', []) if c.get('name')] + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': info.get('description'), + 'duration': duration, + 'timestamp': timestamp, + 'uploader': uploader, + 'thumbnail': thumbnail, + 'categories': categories, + 'age_limit': 18, + } diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py new file mode 100644 index 000000000..b6b71069d --- /dev/null +++ b/youtube_dl/extractor/pornovoisines.py @@ -0,0 +1,108 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + float_or_none, + unified_strdate, +) + + +class PornoVoisinesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/videos/show/(?P<id>\d+)/(?P<display_id>[^/.]+)' + + _TEST = { + 'url': 'http://www.pornovoisines.com/videos/show/919/recherche-appartement.html', + 'md5': '6f8aca6a058592ab49fe701c8ba8317b', + 'info_dict': { + 'id': '919', + 'display_id': 'recherche-appartement', + 'ext': 'mp4', + 'title': 'Recherche appartement', + 'description': 'md5:fe10cb92ae2dd3ed94bb4080d11ff493', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20140925', + 'duration': 120, + 'view_count': int, + 'average_rating': float, + 'categories': ['Débutante', 'Débutantes', 'Scénario', 'Sodomie'], + 'age_limit': 18, + 'subtitles': { + 'fr': [{ + 'ext': 'vtt', + }] + }, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') + + settings_url = self._download_json( + 'http://www.pornovoisines.com/api/video/%s/getsettingsurl/' % video_id, + video_id, note='Getting settings URL')['video_settings_url'] + settings = self._download_json(settings_url, video_id)['data'] + + formats = [] + for kind, data in settings['variants'].items(): + if kind == 'HLS': + formats.extend(self._extract_m3u8_formats( + data, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls')) + elif kind == 'MP4': + for item in data: + formats.append({ + 'url': item['url'], + 'height': item.get('height'), + 'bitrate': item.get('bitrate'), + }) + self._sort_formats(formats) + + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + + # The webpage has a bug - there's no space between "thumb" and src= + thumbnail = self._html_search_regex( + r'<img[^>]+class=([\'"])thumb\1[^>]*src=([\'"])(?P<url>[^"]+)\2', + webpage, 'thumbnail', fatal=False, group='url') + + upload_date = unified_strdate(self._search_regex( + r'Le\s*<b>([\d/]+)', webpage, 'upload date', fatal=False)) + duration = settings.get('main', {}).get('duration') + view_count = int_or_none(self._search_regex( + r'(\d+) vues', webpage, 'view count', fatal=False)) + average_rating = self._search_regex( + r'Note\s*:\s*(\d+(?:,\d+)?)', webpage, 'average rating', fatal=False) + if average_rating: + average_rating = float_or_none(average_rating.replace(',', '.')) + + categories = self._html_search_regex( + r'(?s)Catégories\s*:\s*<b>(.+?)</b>', webpage, 'categories', fatal=False) + if categories: + categories = [category.strip() for category in categories.split(',')] + + subtitles = {'fr': [{ + 'url': subtitle, + } for subtitle in settings.get('main', {}).get('vtt_tracks', {}).values()]} + + return { + 'id': video_id, + 'display_id': display_id, + 'formats': formats, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'duration': duration, + 'view_count': view_count, + 'average_rating': average_rating, + 'categories': categories, + 'age_limit': 18, + 'subtitles': subtitles, + } diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dl/extractor/pornoxo.py new file mode 100644 index 000000000..2831368b6 --- /dev/null +++ b/youtube_dl/extractor/pornoxo.py @@ -0,0 +1,58 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + str_to_int, +) + + +class PornoXOIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html' + _TEST = { + 'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html', + 'md5': '582f28ecbaa9e6e24cb90f50f524ce87', + 'info_dict': { + 'id': '7564', + 'ext': 'flv', + 'title': 'Striptease From Sexy Secretary!', + 'display_id': 'striptease-from-sexy-secretary', + 'description': 'md5:0ee35252b685b3883f4a1d38332f9980', + 'categories': list, # NSFW + 'thumbnail': r're:https?://.*\.jpg$', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id, display_id = mobj.groups() + + webpage = self._download_webpage(url, video_id) + video_data = self._extract_jwplayer_data(webpage, video_id, require_title=False) + + title = self._html_search_regex( + r'<title>([^<]+)\s*-\s*PornoXO', webpage, 'title') + + view_count = str_to_int(self._html_search_regex( + r'[vV]iews:\s*([0-9,]+)', webpage, 'view count', fatal=False)) + + categories_str = self._html_search_regex( + r'<meta name="description" content=".*featuring\s*([^"]+)"', + webpage, 'categories', fatal=False) + categories = ( + None if categories_str is None + else categories_str.split(',')) + + video_data.update({ + 'id': video_id, + 'title': title, + 'display_id': display_id, + 'description': self._html_search_meta('description', webpage), + 'categories': categories, + 'view_count': view_count, + 'age_limit': 18, + }) + + return video_data diff --git a/youtube_dl/extractor/presstv.py b/youtube_dl/extractor/presstv.py new file mode 100644 index 000000000..b5c279203 --- /dev/null +++ b/youtube_dl/extractor/presstv.py @@ -0,0 +1,74 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import remove_start + + +class PressTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?presstv\.ir/[^/]+/(?P<y>\d+)/(?P<m>\d+)/(?P<d>\d+)/(?P<id>\d+)/(?P<display_id>[^/]+)?' + + _TEST = { + 'url': 'http://www.presstv.ir/Detail/2016/04/09/459911/Australian-sewerage-treatment-facility-/', + 'md5': '5d7e3195a447cb13e9267e931d8dd5a5', + 'info_dict': { + 'id': '459911', + 'display_id': 'Australian-sewerage-treatment-facility-', + 'ext': 'mp4', + 'title': 'Organic mattresses used to clean waste water', + 'upload_date': '20160409', + 'thumbnail': r're:^https?://.*\.jpg', + 'description': 'md5:20002e654bbafb6908395a5c0cfcd125' + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') or video_id + + webpage = self._download_webpage(url, display_id) + + # extract video URL from webpage + video_url = self._hidden_inputs(webpage)['inpPlayback'] + + # build list of available formats + # specified in http://www.presstv.ir/Scripts/playback.js + base_url = 'http://192.99.219.222:82/presstv' + _formats = [ + (180, '_low200.mp4'), + (360, '_low400.mp4'), + (720, '_low800.mp4'), + (1080, '.mp4') + ] + + formats = [{ + 'url': base_url + video_url[:-4] + extension, + 'format_id': '%dp' % height, + 'height': height, + } for height, extension in _formats] + + # extract video metadata + title = remove_start( + self._html_search_meta('title', webpage, fatal=True), 'PressTV-') + + thumbnail = self._og_search_thumbnail(webpage) + description = self._og_search_description(webpage) + + upload_date = '%04d%02d%02d' % ( + int(mobj.group('y')), + int(mobj.group('m')), + int(mobj.group('d')), + ) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'description': description + } diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py new file mode 100644 index 000000000..23ac93d7e --- /dev/null +++ b/youtube_dl/extractor/promptfile.py @@ -0,0 +1,70 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + ExtractorError, + urlencode_postdata, +) + + +class PromptFileIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P<id>[0-9A-Z\-]+)' + _TEST = { + 'url': 'http://www.promptfile.com/l/86D1CE8462-576CAAE416', + 'md5': '5a7e285a26e0d66d9a263fae91bc92ce', + 'info_dict': { + 'id': '86D1CE8462-576CAAE416', + 'ext': 'mp4', + 'title': 'oceans.mp4', + 'thumbnail': r're:^https?://.*\.jpg$', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + if re.search(r'<div.+id="not_found_msg".+>(?!We are).+</div>[^-]', webpage) is not None: + raise ExtractorError('Video %s does not exist' % video_id, + expected=True) + + chash = self._search_regex( + r'val\("([^"]*)"\s*\+\s*\$\("#chash"\)', webpage, 'chash') + fields = self._hidden_inputs(webpage) + keys = list(fields.keys()) + chash_key = keys[0] if len(keys) == 1 else next( + key for key in keys if key.startswith('cha')) + fields[chash_key] = chash + fields[chash_key] + + webpage = self._download_webpage( + url, video_id, 'Downloading video page', + data=urlencode_postdata(fields), + headers={'Content-type': 'application/x-www-form-urlencoded'}) + + video_url = self._search_regex( + (r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*Download File', + r'<a[^>]+href=(["\'])(?P<url>https?://(?:www\.)?promptfile\.com/file/(?:(?!\1).)+)\1'), + webpage, 'video url', group='url') + title = self._html_search_regex( + r'<span.+title="([^"]+)">', webpage, 'title') + thumbnail = self._html_search_regex( + r'<div id="player_overlay">.*button>.*?<img src="([^"]+)"', + webpage, 'thumbnail', fatal=False, flags=re.DOTALL) + + formats = [{ + 'format_id': 'sd', + 'url': video_url, + 'ext': determine_ext(title), + }] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + } diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py new file mode 100644 index 000000000..e19a470a5 --- /dev/null +++ b/youtube_dl/extractor/prosiebensat1.py @@ -0,0 +1,500 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from hashlib import sha1 +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + determine_ext, + float_or_none, + int_or_none, + unified_strdate, +) + + +class ProSiebenSat1BaseIE(InfoExtractor): + _GEO_COUNTRIES = ['DE'] + _ACCESS_ID = None + _SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear' + _V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get' + + def _extract_video_info(self, url, clip_id): + client_location = url + + video = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos', + clip_id, 'Downloading videos JSON', query={ + 'access_token': self._TOKEN, + 'client_location': client_location, + 'client_name': self._CLIENT_NAME, + 'ids': clip_id, + })[0] + + if video.get('is_protected') is True: + raise ExtractorError('This video is DRM protected.', expected=True) + + formats = [] + if self._ACCESS_ID: + raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID + server_token = (self._download_json( + self._V4_BASE_URL + 'protocols', clip_id, + 'Downloading protocols JSON', + headers=self.geo_verification_headers(), query={ + 'access_id': self._ACCESS_ID, + 'client_token': sha1((raw_ct).encode()).hexdigest(), + 'video_id': clip_id, + }, fatal=False) or {}).get('server_token') + if server_token: + urls = (self._download_json( + self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={ + 'access_id': self._ACCESS_ID, + 'client_token': sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(), + 'protocols': self._SUPPORTED_PROTOCOLS, + 'server_token': server_token, + 'video_id': clip_id, + }, fatal=False) or {}).get('urls') or {} + for protocol, variant in urls.items(): + source_url = variant.get('clear', {}).get('url') + if not source_url: + continue + if protocol == 'dash': + formats.extend(self._extract_mpd_formats( + source_url, clip_id, mpd_id=protocol, fatal=False)) + elif protocol == 'hls': + formats.extend(self._extract_m3u8_formats( + source_url, clip_id, 'mp4', 'm3u8_native', + m3u8_id=protocol, fatal=False)) + else: + formats.append({ + 'url': source_url, + 'format_id': protocol, + }) + if not formats: + source_ids = [compat_str(source['id']) for source in video['sources']] + + client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() + + sources = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id, + clip_id, 'Downloading sources JSON', query={ + 'access_token': self._TOKEN, + 'client_id': client_id, + 'client_location': client_location, + 'client_name': self._CLIENT_NAME, + }) + server_id = sources['server_id'] + + def fix_bitrate(bitrate): + bitrate = int_or_none(bitrate) + if not bitrate: + return None + return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate + + for source_id in source_ids: + client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest() + urls = self._download_json( + 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id, + clip_id, 'Downloading urls JSON', fatal=False, query={ + 'access_token': self._TOKEN, + 'client_id': client_id, + 'client_location': client_location, + 'client_name': self._CLIENT_NAME, + 'server_id': server_id, + 'source_ids': source_id, + }) + if not urls: + continue + if urls.get('status_code') != 0: + raise ExtractorError('This video is unavailable', expected=True) + urls_sources = urls['sources'] + if isinstance(urls_sources, dict): + urls_sources = urls_sources.values() + for source in urls_sources: + source_url = source.get('url') + if not source_url: + continue + protocol = source.get('protocol') + mimetype = source.get('mimetype') + if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': + formats.extend(self._extract_f4m_formats( + source_url, clip_id, f4m_id='hds', fatal=False)) + elif mimetype == 'application/x-mpegURL': + formats.extend(self._extract_m3u8_formats( + source_url, clip_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif mimetype == 'application/dash+xml': + formats.extend(self._extract_mpd_formats( + source_url, clip_id, mpd_id='dash', fatal=False)) + else: + tbr = fix_bitrate(source['bitrate']) + if protocol in ('rtmp', 'rtmpe'): + mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url) + if not mobj: + continue + path = mobj.group('path') + mp4colon_index = path.rfind('mp4:') + app = path[:mp4colon_index] + play_path = path[mp4colon_index:] + formats.append({ + 'url': '%s/%s' % (mobj.group('url'), app), + 'app': app, + 'play_path': play_path, + 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', + 'page_url': 'http://www.prosieben.de', + 'tbr': tbr, + 'ext': 'flv', + 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''), + }) + else: + formats.append({ + 'url': source_url, + 'tbr': tbr, + 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''), + }) + self._sort_formats(formats) + + return { + 'duration': float_or_none(video.get('duration')), + 'formats': formats, + } + + +class ProSiebenSat1IE(ProSiebenSat1BaseIE): + IE_NAME = 'prosiebensat1' + IE_DESC = 'ProSiebenSat.1 Digital' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)? + (?: + (?:beta\.)? + (?: + prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia + )\.(?:de|at|ch)| + ran\.de|fem\.com|advopedia\.de|galileo\.tv/video + ) + /(?P<id>.+) + ''' + + _TESTS = [ + { + # Tests changes introduced in https://github.com/ytdl-org/youtube-dl/pull/6242 + # in response to fixing https://github.com/ytdl-org/youtube-dl/issues/6215: + # - malformed f4m manifest support + # - proper handling of URLs starting with `https?://` in 2.0 manifests + # - recursive child f4m manifests extraction + 'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge', + 'info_dict': { + 'id': '2104602', + 'ext': 'mp4', + 'title': 'Episode 18 - Staffel 2', + 'description': 'md5:8733c81b702ea472e069bc48bb658fc1', + 'upload_date': '20131231', + 'duration': 5845.04, + }, + }, + { + 'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html', + 'info_dict': { + 'id': '2570327', + 'ext': 'mp4', + 'title': 'Lady-Umstyling für Audrina', + 'description': 'md5:4c16d0c17a3461a0d43ea4084e96319d', + 'upload_date': '20131014', + 'duration': 606.76, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'Seems to be broken', + }, + { + 'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge', + 'info_dict': { + 'id': '2429369', + 'ext': 'mp4', + 'title': 'Countdown für die Autowerkstatt', + 'description': 'md5:809fc051a457b5d8666013bc40698817', + 'upload_date': '20140223', + 'duration': 2595.04, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'This video is unavailable', + }, + { + 'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip', + 'info_dict': { + 'id': '2904997', + 'ext': 'mp4', + 'title': 'Sexy laufen in Ugg Boots', + 'description': 'md5:edf42b8bd5bc4e5da4db4222c5acb7d6', + 'upload_date': '20140122', + 'duration': 245.32, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'This video is unavailable', + }, + { + 'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip', + 'info_dict': { + 'id': '2906572', + 'ext': 'mp4', + 'title': 'Im Interview: Kai Wiesinger', + 'description': 'md5:e4e5370652ec63b95023e914190b4eb9', + 'upload_date': '20140203', + 'duration': 522.56, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'This video is unavailable', + }, + { + 'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge', + 'info_dict': { + 'id': '2992323', + 'ext': 'mp4', + 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2', + 'description': 'md5:2669cde3febe9bce13904f701e774eb6', + 'upload_date': '20141014', + 'duration': 2410.44, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'This video is unavailable', + }, + { + 'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge', + 'info_dict': { + 'id': '3004256', + 'ext': 'mp4', + 'title': 'Schalke: Tönnies möchte Raul zurück', + 'description': 'md5:4b5b271d9bcde223b54390754c8ece3f', + 'upload_date': '20140226', + 'duration': 228.96, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'skip': 'This video is unavailable', + }, + { + 'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip', + 'info_dict': { + 'id': '2572814', + 'ext': 'mp4', + 'title': 'Andreas Kümmert: Rocket Man', + 'description': 'md5:6ddb02b0781c6adf778afea606652e38', + 'upload_date': '20131017', + 'duration': 469.88, + }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html', + 'info_dict': { + 'id': '2156342', + 'ext': 'mp4', + 'title': 'Kurztrips zum Valentinstag', + 'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.', + 'duration': 307.24, + }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist', + 'info_dict': { + 'id': '439664', + 'title': 'Episode 8 - Ganze Folge - Playlist', + 'description': 'md5:63b8963e71f481782aeea877658dec84', + }, + 'playlist_count': 2, + 'skip': 'This video is unavailable', + }, + { + 'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge', + 'info_dict': { + 'id': '4187506', + 'ext': 'mp4', + 'title': 'Best of Circus HalliGalli', + 'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9', + 'upload_date': '20151229', + }, + 'params': { + 'skip_download': True, + }, + }, + { + # title in <h2 class="subtitle"> + 'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip', + 'info_dict': { + 'id': '4895826', + 'ext': 'mp4', + 'title': 'Jetzt erst enthüllt: Das Geheimnis von Emma Stones Oscar-Robe', + 'description': 'md5:e5ace2bc43fadf7b63adc6187e9450b9', + 'upload_date': '20170302', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'geo restricted to Germany', + }, + { + # geo restricted to Germany + 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge', + 'only_matching': True, + }, + { + # geo restricted to Germany + 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge', + 'only_matching': True, + }, + { + # geo restricted to Germany + 'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden', + 'only_matching': True, + }, + { + 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel', + 'only_matching': True, + }, + { + 'url': 'http://www.advopedia.de/videos/lenssen-klaert-auf/lenssen-klaert-auf-folge-8-staffel-3-feiertage-und-freie-tage', + 'only_matching': True, + }, + ] + + _TOKEN = 'prosieben' + _SALT = '01!8d8F_)r9]4s[qeuXfP%' + _CLIENT_NAME = 'kolibri-2.0.19-splec4' + + _ACCESS_ID = 'x_prosiebenmaxx-de' + _ENCRYPTION_KEY = 'Eeyeey9oquahthainoofashoyoikosag' + _IV = 'Aeluchoc6aevechuipiexeeboowedaok' + + _CLIPID_REGEXES = [ + r'"clip_id"\s*:\s+"(\d+)"', + r'clipid: "(\d+)"', + r'clip[iI]d=(\d+)', + r'clip[iI][dD]\s*=\s*["\'](\d+)', + r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)", + r'proMamsId"\s*:\s*"(\d+)', + r'proMamsId"\s*:\s*"(\d+)', + ] + _TITLE_REGEXES = [ + r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', + r'<header class="clearfix">\s*<h3>(.+?)</h3>', + r'<!-- start video -->\s*<h1>(.+?)</h1>', + r'<h1 class="att-name">\s*(.+?)</h1>', + r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>', + r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>', + r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>', + r'<h2[^>]+class="subtitle"[^>]*>([^<]+)</h2>', + ] + _DESCRIPTION_REGEXES = [ + r'<p itemprop="description">\s*(.+?)</p>', + r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>', + r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>', + r'<p class="att-description">\s*(.+?)\s*</p>', + r'<p class="video-description" itemprop="description">\s*(.+?)</p>', + r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>', + ] + _UPLOAD_DATE_REGEXES = [ + r'<meta property="og:published_time" content="(.+?)">', + r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"', + r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr', + r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>', + r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>', + ] + _PAGE_TYPE_REGEXES = [ + r'<meta name="page_type" content="([^"]+)">', + r"'itemType'\s*:\s*'([^']*)'", + ] + _PLAYLIST_ID_REGEXES = [ + r'content[iI]d=(\d+)', + r"'itemId'\s*:\s*'([^']*)'", + ] + _PLAYLIST_CLIP_REGEXES = [ + r'(?s)data-qvt=.+?<a href="([^"]+)"', + ] + + def _extract_clip(self, url, webpage): + clip_id = self._html_search_regex( + self._CLIPID_REGEXES, webpage, 'clip id') + title = self._html_search_regex( + self._TITLE_REGEXES, webpage, 'title', + default=None) or self._og_search_title(webpage) + info = self._extract_video_info(url, clip_id) + description = self._html_search_regex( + self._DESCRIPTION_REGEXES, webpage, 'description', default=None) + if description is None: + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + upload_date = unified_strdate(self._html_search_regex( + self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None)) + + info.update({ + 'id': clip_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + }) + return info + + def _extract_playlist(self, url, webpage): + playlist_id = self._html_search_regex( + self._PLAYLIST_ID_REGEXES, webpage, 'playlist id') + playlist = self._parse_json( + self._search_regex( + r'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script', + webpage, 'playlist'), + playlist_id) + entries = [] + for item in playlist: + clip_id = item.get('id') or item.get('upc') + if not clip_id: + continue + info = self._extract_video_info(url, clip_id) + info.update({ + 'id': clip_id, + 'title': item.get('title') or item.get('teaser', {}).get('headline'), + 'description': item.get('teaser', {}).get('description'), + 'thumbnail': item.get('poster'), + 'duration': float_or_none(item.get('duration')), + 'series': item.get('tvShowTitle'), + 'uploader': item.get('broadcastPublisher'), + }) + entries.append(info) + return self.playlist_result(entries, playlist_id) + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + page_type = self._search_regex( + self._PAGE_TYPE_REGEXES, webpage, + 'page type', default='clip').lower() + if page_type == 'clip': + return self._extract_clip(url, webpage) + elif page_type == 'playlist': + return self._extract_playlist(url, webpage) + else: + raise ExtractorError( + 'Unsupported page type %s' % page_type, expected=True) diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py new file mode 100644 index 000000000..5465e8ab7 --- /dev/null +++ b/youtube_dl/extractor/puhutv.py @@ -0,0 +1,247 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_HTTPError, + compat_str, +) +from ..utils import ( + ExtractorError, + int_or_none, + float_or_none, + parse_resolution, + str_or_none, + try_get, + unified_timestamp, + url_or_none, + urljoin, +) + + +class PuhuTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle' + IE_NAME = 'puhutv' + _TESTS = [{ + # film + 'url': 'https://puhutv.com/sut-kardesler-izle', + 'md5': 'fbd8f2d8e7681f8bcd51b592475a6ae7', + 'info_dict': { + 'id': '5085', + 'display_id': 'sut-kardesler', + 'ext': 'mp4', + 'title': 'Süt Kardeşler', + 'description': 'md5:405fd024df916ca16731114eb18e511a', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 4832.44, + 'creator': 'Arzu Film', + 'timestamp': 1469778212, + 'upload_date': '20160729', + 'release_year': 1976, + 'view_count': int, + 'tags': ['Aile', 'Komedi', 'Klasikler'], + }, + }, { + # episode, geo restricted, bypassable with --geo-verification-proxy + 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle', + 'only_matching': True, + }, { + # 4k, with subtitles + 'url': 'https://puhutv.com/dip-1-bolum-izle', + 'only_matching': True, + }] + _SUBTITLE_LANGS = { + 'English': 'en', + 'Deutsch': 'de', + 'عربى': 'ar' + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + info = self._download_json( + urljoin(url, '/api/slug/%s-izle' % display_id), + display_id)['data'] + + video_id = compat_str(info['id']) + title = info.get('name') or info['title']['name'] + if info.get('display_name'): + title = '%s %s' % (title, info.get('display_name')) + + try: + videos = self._download_json( + 'https://puhutv.com/api/assets/%s/videos' % video_id, + display_id, 'Downloading video JSON', + headers=self.geo_verification_headers()) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + self.raise_geo_restricted() + raise + + formats = [] + for video in videos['data']['videos']: + media_url = url_or_none(video.get('url')) + if not media_url: + continue + playlist = video.get('is_playlist') + if video.get('stream_type') == 'hls' and playlist is True: + formats.extend(self._extract_m3u8_formats( + media_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + continue + quality = int_or_none(video.get('quality')) + f = { + 'url': media_url, + 'ext': 'mp4', + 'height': quality + } + video_format = video.get('video_format') + if video_format == 'hls' and playlist is False: + format_id = 'hls' + f['protocol'] = 'm3u8_native' + elif video_format == 'mp4': + format_id = 'http' + + else: + continue + if quality: + format_id += '-%sp' % quality + f['format_id'] = format_id + formats.append(f) + self._sort_formats(formats) + + description = try_get( + info, lambda x: x['title']['description'], + compat_str) or info.get('description') + timestamp = unified_timestamp(info.get('created_at')) + creator = try_get( + info, lambda x: x['title']['producer']['name'], compat_str) + + duration = float_or_none( + try_get(info, lambda x: x['content']['duration_in_ms'], int), + scale=1000) + view_count = try_get(info, lambda x: x['content']['watch_count'], int) + + images = try_get( + info, lambda x: x['content']['images']['wide'], dict) or {} + thumbnails = [] + for image_id, image_url in images.items(): + if not isinstance(image_url, compat_str): + continue + if not image_url.startswith(('http', '//')): + image_url = 'https://%s' % image_url + t = parse_resolution(image_id) + t.update({ + 'id': image_id, + 'url': image_url + }) + thumbnails.append(t) + + release_year = try_get(info, lambda x: x['title']['released_at'], int) + + season_number = int_or_none(info.get('season_number')) + season_id = str_or_none(info.get('season_id')) + episode_number = int_or_none(info.get('episode_number')) + + tags = [] + for genre in try_get(info, lambda x: x['title']['genres'], list) or []: + if not isinstance(genre, dict): + continue + genre_name = genre.get('name') + if genre_name and isinstance(genre_name, compat_str): + tags.append(genre_name) + + subtitles = {} + for subtitle in try_get( + info, lambda x: x['content']['subtitles'], list) or []: + if not isinstance(subtitle, dict): + continue + lang = subtitle.get('language') + sub_url = url_or_none(subtitle.get('url')) + if not lang or not isinstance(lang, compat_str) or not sub_url: + continue + subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ + 'url': sub_url + }] + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'season_id': season_id, + 'season_number': season_number, + 'episode_number': episode_number, + 'release_year': release_year, + 'timestamp': timestamp, + 'creator': creator, + 'view_count': view_count, + 'duration': duration, + 'tags': tags, + 'subtitles': subtitles, + 'thumbnails': thumbnails, + 'formats': formats + } + + +class PuhuTVSerieIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay' + IE_NAME = 'puhutv:serie' + _TESTS = [{ + 'url': 'https://puhutv.com/deniz-yildizi-detay', + 'info_dict': { + 'title': 'Deniz Yıldızı', + 'id': 'deniz-yildizi', + }, + 'playlist_mincount': 205, + }, { + # a film detail page which is using same url with serie page + 'url': 'https://puhutv.com/kaybedenler-kulubu-detay', + 'only_matching': True, + }] + + def _extract_entries(self, seasons): + for season in seasons: + season_id = season.get('id') + if not season_id: + continue + page = 1 + has_more = True + while has_more is True: + season = self._download_json( + 'https://galadriel.puhutv.com/seasons/%s' % season_id, + season_id, 'Downloading page %s' % page, query={ + 'page': page, + 'per': 40, + }) + episodes = season.get('episodes') + if isinstance(episodes, list): + for ep in episodes: + slug_path = str_or_none(ep.get('slugPath')) + if not slug_path: + continue + video_id = str_or_none(int_or_none(ep.get('id'))) + yield self.url_result( + 'https://puhutv.com/%s' % slug_path, + ie=PuhuTVIE.ie_key(), video_id=video_id, + video_title=ep.get('name') or ep.get('eventLabel')) + page += 1 + has_more = season.get('hasMore') + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + info = self._download_json( + urljoin(url, '/api/slug/%s-detay' % playlist_id), + playlist_id)['data'] + + seasons = info.get('seasons') + if seasons: + return self.playlist_result( + self._extract_entries(seasons), playlist_id, info.get('name')) + + # For films, these are using same url with series + video_id = info.get('slug') or info['assets'][0]['slug'] + return self.url_result( + 'https://puhutv.com/%s-izle' % video_id, + PuhuTVIE.ie_key(), video_id) diff --git a/youtube_dl/extractor/puls4.py b/youtube_dl/extractor/puls4.py new file mode 100644 index 000000000..80091b85f --- /dev/null +++ b/youtube_dl/extractor/puls4.py @@ -0,0 +1,57 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .prosiebensat1 import ProSiebenSat1BaseIE +from ..utils import ( + unified_strdate, + parse_duration, + compat_str, +) + + +class Puls4IE(ProSiebenSat1BaseIE): + _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>[^?#&]+)' + _TESTS = [{ + 'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118', + 'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03', + 'info_dict': { + 'id': '118118', + 'ext': 'flv', + 'title': 'Tobias Homberger von myclubs im #2min2miotalk', + 'description': 'md5:f9def7c5e8745d6026d8885487d91955', + 'upload_date': '20160830', + 'uploader': 'PULS_4', + }, + }, { + 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident.-Norbert-Hofer', + 'only_matching': True, + }, { + 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident-Analyse-des-Interviews-mit-Norbert-Hofer-416598', + 'only_matching': True, + }] + _TOKEN = 'puls4' + _SALT = '01!kaNgaiNgah1Ie4AeSha' + _CLIENT_NAME = '' + + def _real_extract(self, url): + path = self._match_id(url) + content_path = self._download_json( + 'http://www.puls4.com/api/json-fe/page/' + path, path)['content'][0]['url'] + media = self._download_json( + 'http://www.puls4.com' + content_path, + content_path)['mediaCurrent'] + player_content = media['playerContent'] + info = self._extract_video_info(url, player_content['id']) + info.update({ + 'id': compat_str(media['objectId']), + 'title': player_content['title'], + 'description': media.get('description'), + 'thumbnail': media.get('previewLink'), + 'upload_date': unified_strdate(media.get('date')), + 'duration': parse_duration(player_content.get('duration')), + 'episode': player_content.get('episodePartName'), + 'show': media.get('channel'), + 'season_id': player_content.get('seasonId'), + 'uploader': player_content.get('sourceCompany'), + }) + return info diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dl/extractor/pyvideo.py new file mode 100644 index 000000000..b8ac93a62 --- /dev/null +++ b/youtube_dl/extractor/pyvideo.py @@ -0,0 +1,72 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import int_or_none + + +class PyvideoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/(?P<category>[^/]+)/(?P<id>[^/?#&.]+)' + + _TESTS = [{ + 'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html', + 'info_dict': { + 'id': 'become-a-logging-expert-in-30-minutes', + }, + 'playlist_count': 2, + }, { + 'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html', + 'md5': '5fe1c7e0a8aa5570330784c847ff6d12', + 'info_dict': { + 'id': '2542', + 'ext': 'm4v', + 'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v', + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + category = mobj.group('category') + video_id = mobj.group('id') + + entries = [] + + data = self._download_json( + 'https://raw.githubusercontent.com/pyvideo/data/master/%s/videos/%s.json' + % (category, video_id), video_id, fatal=False) + + if data: + for video in data['videos']: + video_url = video.get('url') + if video_url: + if video.get('type') == 'youtube': + entries.append(self.url_result(video_url, 'Youtube')) + else: + entries.append({ + 'id': compat_str(data.get('id') or video_id), + 'url': video_url, + 'title': data['title'], + 'description': data.get('description') or data.get('summary'), + 'thumbnail': data.get('thumbnail_url'), + 'duration': int_or_none(data.get('duration')), + }) + else: + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) + media_urls = self._search_regex( + r'(?s)Media URL:(.+?)</li>', webpage, 'media urls') + for m in re.finditer( + r'<a[^>]+href=(["\'])(?P<url>http.+?)\1', media_urls): + media_url = m.group('url') + if re.match(r'https?://www\.youtube\.com/watch\?v=.*', media_url): + entries.append(self.url_result(media_url, 'Youtube')) + else: + entries.append({ + 'id': video_id, + 'url': media_url, + 'title': title, + }) + + return self.playlist_result(entries, video_id) diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py new file mode 100644 index 000000000..084308aeb --- /dev/null +++ b/youtube_dl/extractor/qqmusic.py @@ -0,0 +1,369 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import random +import re +import time + +from .common import InfoExtractor +from ..utils import ( + clean_html, + ExtractorError, + strip_jsonp, + unescapeHTML, +) + + +class QQMusicIE(InfoExtractor): + IE_NAME = 'qqmusic' + IE_DESC = 'QQ音乐' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/song/(?P<id>[0-9A-Za-z]+)\.html' + _TESTS = [{ + 'url': 'https://y.qq.com/n/yqq/song/004295Et37taLD.html', + 'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8', + 'info_dict': { + 'id': '004295Et37taLD', + 'ext': 'mp3', + 'title': '可惜没如果', + 'release_date': '20141227', + 'creator': '林俊杰', + 'description': 'md5:d85afb3051952ecc50a1ee8a286d1eac', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }, { + 'note': 'There is no mp3-320 version of this song.', + 'url': 'https://y.qq.com/n/yqq/song/004MsGEo3DdNxV.html', + 'md5': 'fa3926f0c585cda0af8fa4f796482e3e', + 'info_dict': { + 'id': '004MsGEo3DdNxV', + 'ext': 'mp3', + 'title': '如果', + 'release_date': '20050626', + 'creator': '李季美', + 'description': 'md5:46857d5ed62bc4ba84607a805dccf437', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }, { + 'note': 'lyrics not in .lrc format', + 'url': 'https://y.qq.com/n/yqq/song/001JyApY11tIp6.html', + 'info_dict': { + 'id': '001JyApY11tIp6', + 'ext': 'mp3', + 'title': 'Shadows Over Transylvania', + 'release_date': '19970225', + 'creator': 'Dark Funeral', + 'description': 'md5:c9b20210587cbcd6836a1c597bab4525', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': { + 'skip_download': True, + }, + }] + + _FORMATS = { + 'mp3-320': {'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320}, + 'mp3-128': {'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128}, + 'm4a': {'prefix': 'C200', 'ext': 'm4a', 'preference': 10} + } + + # Reference: m_r_GetRUin() in top_player.js + # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js + @staticmethod + def m_r_get_ruin(): + curMs = int(time.time() * 1000) % 1000 + return int(round(random.random() * 2147483647) * curMs % 1E10) + + def _real_extract(self, url): + mid = self._match_id(url) + + detail_info_page = self._download_webpage( + 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_yqq_song_detail_info.fcg?songmid=%s&play=0' % mid, + mid, note='Download song detail info', + errnote='Unable to get song detail info', encoding='gbk') + + song_name = self._html_search_regex( + r"songname:\s*'([^']+)'", detail_info_page, 'song name') + + publish_time = self._html_search_regex( + r'发行时间:(\d{4}-\d{2}-\d{2})', detail_info_page, + 'publish time', default=None) + if publish_time: + publish_time = publish_time.replace('-', '') + + singer = self._html_search_regex( + r"singer:\s*'([^']+)", detail_info_page, 'singer', default=None) + + lrc_content = self._html_search_regex( + r'<div class="content" id="lrc_content"[^<>]*>([^<>]+)</div>', + detail_info_page, 'LRC lyrics', default=None) + if lrc_content: + lrc_content = lrc_content.replace('\\n', '\n') + + thumbnail_url = None + albummid = self._search_regex( + [r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'], + detail_info_page, 'album mid', default=None) + if albummid: + thumbnail_url = 'http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg' \ + % (albummid[-2:-1], albummid[-1], albummid) + + guid = self.m_r_get_ruin() + + vkey = self._download_json( + 'http://base.music.qq.com/fcgi-bin/fcg_musicexpress.fcg?json=3&guid=%s' % guid, + mid, note='Retrieve vkey', errnote='Unable to get vkey', + transform_source=strip_jsonp)['key'] + + formats = [] + for format_id, details in self._FORMATS.items(): + formats.append({ + 'url': 'http://cc.stream.qqmusic.qq.com/%s%s.%s?vkey=%s&guid=%s&fromtag=0' + % (details['prefix'], mid, details['ext'], vkey, guid), + 'format': format_id, + 'format_id': format_id, + 'preference': details['preference'], + 'abr': details.get('abr'), + }) + self._check_formats(formats, mid) + self._sort_formats(formats) + + actual_lrc_lyrics = ''.join( + line + '\n' for line in re.findall( + r'(?m)^(\[[0-9]{2}:[0-9]{2}(?:\.[0-9]{2,})?\][^\n]*|\[[^\]]*\])', lrc_content)) + + info_dict = { + 'id': mid, + 'formats': formats, + 'title': song_name, + 'release_date': publish_time, + 'creator': singer, + 'description': lrc_content, + 'thumbnail': thumbnail_url + } + if actual_lrc_lyrics: + info_dict['subtitles'] = { + 'origin': [{ + 'ext': 'lrc', + 'data': actual_lrc_lyrics, + }] + } + return info_dict + + +class QQPlaylistBaseIE(InfoExtractor): + @staticmethod + def qq_static_url(category, mid): + return 'http://y.qq.com/y/static/%s/%s/%s/%s.html' % (category, mid[-2], mid[-1], mid) + + def get_singer_all_songs(self, singmid, num): + return self._download_webpage( + r'https://c.y.qq.com/v8/fcg-bin/fcg_v8_singer_track_cp.fcg', singmid, + query={ + 'format': 'json', + 'inCharset': 'utf8', + 'outCharset': 'utf-8', + 'platform': 'yqq', + 'needNewCode': 0, + 'singermid': singmid, + 'order': 'listen', + 'begin': 0, + 'num': num, + 'songstatus': 1, + }) + + def get_entries_from_page(self, singmid): + entries = [] + + default_num = 1 + json_text = self.get_singer_all_songs(singmid, default_num) + json_obj_all_songs = self._parse_json(json_text, singmid) + + if json_obj_all_songs['code'] == 0: + total = json_obj_all_songs['data']['total'] + json_text = self.get_singer_all_songs(singmid, total) + json_obj_all_songs = self._parse_json(json_text, singmid) + + for item in json_obj_all_songs['data']['list']: + if item['musicData'].get('songmid') is not None: + songmid = item['musicData']['songmid'] + entries.append(self.url_result( + r'https://y.qq.com/n/yqq/song/%s.html' % songmid, 'QQMusic', songmid)) + + return entries + + +class QQMusicSingerIE(QQPlaylistBaseIE): + IE_NAME = 'qqmusic:singer' + IE_DESC = 'QQ音乐 - 歌手' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/singer/(?P<id>[0-9A-Za-z]+)\.html' + _TEST = { + 'url': 'https://y.qq.com/n/yqq/singer/001BLpXF2DyJe2.html', + 'info_dict': { + 'id': '001BLpXF2DyJe2', + 'title': '林俊杰', + 'description': 'md5:870ec08f7d8547c29c93010899103751', + }, + 'playlist_mincount': 12, + } + + def _real_extract(self, url): + mid = self._match_id(url) + + entries = self.get_entries_from_page(mid) + singer_page = self._download_webpage(url, mid, 'Download singer page') + singer_name = self._html_search_regex( + r"singername\s*:\s*'(.*?)'", singer_page, 'singer name', default=None) + singer_desc = None + + if mid: + singer_desc_page = self._download_xml( + 'http://s.plcloud.music.qq.com/fcgi-bin/fcg_get_singer_desc.fcg', mid, + 'Donwload singer description XML', + query={'utf8': 1, 'outCharset': 'utf-8', 'format': 'xml', 'singermid': mid}, + headers={'Referer': 'https://y.qq.com/n/yqq/singer/'}) + + singer_desc = singer_desc_page.find('./data/info/desc').text + + return self.playlist_result(entries, mid, singer_name, singer_desc) + + +class QQMusicAlbumIE(QQPlaylistBaseIE): + IE_NAME = 'qqmusic:album' + IE_DESC = 'QQ音乐 - 专辑' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/album/(?P<id>[0-9A-Za-z]+)\.html' + + _TESTS = [{ + 'url': 'https://y.qq.com/n/yqq/album/000gXCTb2AhRR1.html', + 'info_dict': { + 'id': '000gXCTb2AhRR1', + 'title': '我们都是这样长大的', + 'description': 'md5:179c5dce203a5931970d306aa9607ea6', + }, + 'playlist_count': 4, + }, { + 'url': 'https://y.qq.com/n/yqq/album/002Y5a3b3AlCu3.html', + 'info_dict': { + 'id': '002Y5a3b3AlCu3', + 'title': '그리고...', + 'description': 'md5:a48823755615508a95080e81b51ba729', + }, + 'playlist_count': 8, + }] + + def _real_extract(self, url): + mid = self._match_id(url) + + album = self._download_json( + 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid=%s&format=json' % mid, + mid, 'Download album page')['data'] + + entries = [ + self.url_result( + 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid'] + ) for song in album['list'] + ] + album_name = album.get('name') + album_detail = album.get('desc') + if album_detail is not None: + album_detail = album_detail.strip() + + return self.playlist_result(entries, mid, album_name, album_detail) + + +class QQMusicToplistIE(QQPlaylistBaseIE): + IE_NAME = 'qqmusic:toplist' + IE_DESC = 'QQ音乐 - 排行榜' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/toplist/(?P<id>[0-9]+)\.html' + + _TESTS = [{ + 'url': 'https://y.qq.com/n/yqq/toplist/123.html', + 'info_dict': { + 'id': '123', + 'title': '美国iTunes榜', + 'description': 'md5:89db2335fdbb10678dee2d43fe9aba08', + }, + 'playlist_count': 100, + }, { + 'url': 'https://y.qq.com/n/yqq/toplist/3.html', + 'info_dict': { + 'id': '3', + 'title': '巅峰榜·欧美', + 'description': 'md5:5a600d42c01696b26b71f8c4d43407da', + }, + 'playlist_count': 100, + }, { + 'url': 'https://y.qq.com/n/yqq/toplist/106.html', + 'info_dict': { + 'id': '106', + 'title': '韩国Mnet榜', + 'description': 'md5:cb84b325215e1d21708c615cac82a6e7', + }, + 'playlist_count': 50, + }] + + def _real_extract(self, url): + list_id = self._match_id(url) + + toplist_json = self._download_json( + 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg', list_id, + note='Download toplist page', + query={'type': 'toplist', 'topid': list_id, 'format': 'json'}) + + entries = [self.url_result( + 'https://y.qq.com/n/yqq/song/' + song['data']['songmid'] + '.html', 'QQMusic', + song['data']['songmid']) + for song in toplist_json['songlist']] + + topinfo = toplist_json.get('topinfo', {}) + list_name = topinfo.get('ListName') + list_description = topinfo.get('info') + return self.playlist_result(entries, list_id, list_name, list_description) + + +class QQMusicPlaylistIE(QQPlaylistBaseIE): + IE_NAME = 'qqmusic:playlist' + IE_DESC = 'QQ音乐 - 歌单' + _VALID_URL = r'https?://y\.qq\.com/n/yqq/playlist/(?P<id>[0-9]+)\.html' + + _TESTS = [{ + 'url': 'http://y.qq.com/n/yqq/playlist/3462654915.html', + 'info_dict': { + 'id': '3462654915', + 'title': '韩国5月新歌精选下旬', + 'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4', + }, + 'playlist_count': 40, + 'skip': 'playlist gone', + }, { + 'url': 'https://y.qq.com/n/yqq/playlist/1374105607.html', + 'info_dict': { + 'id': '1374105607', + 'title': '易入人心的华语民谣', + 'description': '民谣的歌曲易于传唱、、歌词朗朗伤口、旋律简单温馨。属于那种才入耳孔。却上心头的感觉。没有太多的复杂情绪。简单而直接地表达乐者的情绪,就是这样的简单才易入人心。', + }, + 'playlist_count': 20, + }] + + def _real_extract(self, url): + list_id = self._match_id(url) + + list_json = self._download_json( + 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg', + list_id, 'Download list page', + query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id}, + transform_source=strip_jsonp) + if not len(list_json.get('cdlist', [])): + if list_json.get('code'): + raise ExtractorError( + 'QQ Music said: error %d in fetching playlist info' % list_json['code'], + expected=True) + raise ExtractorError('Unable to get playlist info') + + cdlist = list_json['cdlist'][0] + entries = [self.url_result( + 'https://y.qq.com/n/yqq/song/' + song['songmid'] + '.html', 'QQMusic', song['songmid']) + for song in cdlist['songlist']] + + list_name = cdlist.get('dissname') + list_description = clean_html(unescapeHTML(cdlist.get('desc'))) + return self.playlist_result(entries, list_id, list_name, list_description) diff --git a/youtube_dl/extractor/r7.py b/youtube_dl/extractor/r7.py new file mode 100644 index 000000000..e2202d603 --- /dev/null +++ b/youtube_dl/extractor/r7.py @@ -0,0 +1,112 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class R7IE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/| + noticias\.r7\.com(?:/[^/]+)+/[^/]+-| + player\.r7\.com/video/i/ + ) + (?P<id>[\da-f]{24}) + ''' + _TESTS = [{ + 'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html', + 'md5': '403c4e393617e8e8ddc748978ee8efde', + 'info_dict': { + 'id': '54e7050b0cf2ff57e0279389', + 'ext': 'mp4', + 'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"', + 'description': 'md5:01812008664be76a6479aa58ec865b72', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 98, + 'like_count': int, + 'view_count': int, + }, + }, { + 'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html', + 'only_matching': True, + }, { + 'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/', + 'only_matching': True, + }, { + 'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_json( + 'http://player-api.r7.com/video/i/%s' % video_id, video_id) + + title = video['title'] + + formats = [] + media_url_hls = video.get('media_url_hls') + if media_url_hls: + formats.extend(self._extract_m3u8_formats( + media_url_hls, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + media_url = video.get('media_url') + if media_url: + f = { + 'url': media_url, + 'format_id': 'http', + } + # m3u8 format always matches the http format, let's copy metadata from + # one to another + m3u8_formats = list(filter( + lambda f: f.get('vcodec') != 'none', formats)) + if len(m3u8_formats) == 1: + f_copy = m3u8_formats[0].copy() + f_copy.update(f) + f_copy['protocol'] = 'http' + f = f_copy + formats.append(f) + self._sort_formats(formats) + + description = video.get('description') + thumbnail = video.get('thumb') + duration = int_or_none(video.get('media_duration')) + like_count = int_or_none(video.get('likes')) + view_count = int_or_none(video.get('views')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'like_count': like_count, + 'view_count': view_count, + 'formats': formats, + } + + +class R7ArticleIE(InfoExtractor): + _VALID_URL = r'https?://(?:[a-zA-Z]+)\.r7\.com/(?:[^/]+/)+[^/?#&]+-(?P<id>\d+)' + _TEST = { + 'url': 'http://tv.r7.com/record-play/balanco-geral/videos/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-16102015', + 'only_matching': True, + } + + @classmethod + def suitable(cls, url): + return False if R7IE.suitable(url) else super(R7ArticleIE, cls).suitable(url) + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + r'<div[^>]+(?:id=["\']player-|class=["\']embed["\'][^>]+id=["\'])([\da-f]{24})', + webpage, 'video id') + + return self.url_result('http://player.r7.com/video/i/%s' % video_id, R7IE.ie_key()) diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dl/extractor/radiobremen.py new file mode 100644 index 000000000..2c35f9845 --- /dev/null +++ b/youtube_dl/extractor/radiobremen.py @@ -0,0 +1,63 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import parse_duration + + +class RadioBremenIE(InfoExtractor): + _VALID_URL = r'http?://(?:www\.)?radiobremen\.de/mediathek/(?:index\.html)?\?id=(?P<id>[0-9]+)' + IE_NAME = 'radiobremen' + + _TEST = { + 'url': 'http://www.radiobremen.de/mediathek/?id=141876', + 'info_dict': { + 'id': '141876', + 'ext': 'mp4', + 'duration': 178, + 'width': 512, + 'title': 'Druck auf Patrick Öztürk', + 'thumbnail': r're:https?://.*\.jpg$', + 'description': 'Gegen den SPD-Bürgerschaftsabgeordneten Patrick Öztürk wird wegen Beihilfe zum gewerbsmäßigen Betrug ermittelt. Am Donnerstagabend sollte er dem Vorstand des SPD-Unterbezirks Bremerhaven dazu Rede und Antwort stehen.', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + meta_url = 'http://www.radiobremen.de/apps/php/mediathek/metadaten.php?id=%s' % video_id + meta_doc = self._download_webpage( + meta_url, video_id, 'Downloading metadata') + title = self._html_search_regex( + r'<h1.*>(?P<title>.+)</h1>', meta_doc, 'title') + description = self._html_search_regex( + r'<p>(?P<description>.*)</p>', meta_doc, 'description', fatal=False) + duration = parse_duration(self._html_search_regex( + r'Länge:</td>\s+<td>(?P<duration>[0-9]+:[0-9]+)</td>', + meta_doc, 'duration', fatal=False)) + + page_doc = self._download_webpage( + url, video_id, 'Downloading video information') + mobj = re.search( + r"ardformatplayerclassic\(\'playerbereich\',\'(?P<width>[0-9]+)\',\'.*\',\'(?P<video_id>[0-9]+)\',\'(?P<secret>[0-9]+)\',\'(?P<thumbnail>.+)\',\'\'\)", + page_doc) + video_url = ( + "http://dl-ondemand.radiobremen.de/mediabase/%s/%s_%s_%s.mp4" % + (video_id, video_id, mobj.group("secret"), mobj.group('width'))) + + formats = [{ + 'url': video_url, + 'ext': 'mp4', + 'width': int(mobj.group('width')), + }] + return { + 'id': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'formats': formats, + 'thumbnail': mobj.group('thumbnail'), + } diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py new file mode 100644 index 000000000..a28b1a24c --- /dev/null +++ b/youtube_dl/extractor/radiocanada.py @@ -0,0 +1,171 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, + unified_strdate, +) + + +class RadioCanadaIE(InfoExtractor): + IE_NAME = 'radiocanada' + _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)' + _TESTS = [ + { + 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', + 'info_dict': { + 'id': '7184272', + 'ext': 'mp4', + 'title': 'Le parcours du tireur capté sur vidéo', + 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', + 'upload_date': '20141023', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, + { + # empty Title + 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/', + 'info_dict': { + 'id': '7754998', + 'ext': 'mp4', + 'title': 'letelejournal22h', + 'description': 'INTEGRALE WEB 22H-TJ', + 'upload_date': '20170720', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + # with protectionType but not actually DRM protected + 'url': 'radiocanada:toutv:140872', + 'info_dict': { + 'id': '140872', + 'title': 'Épisode 1', + 'series': 'District 31', + }, + 'only_matching': True, + } + ] + _GEO_COUNTRIES = ['CA'] + _access_token = None + _claims = None + + def _call_api(self, path, video_id=None, app_code=None, query=None): + if not query: + query = {} + query.update({ + 'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb', + 'output': 'json', + }) + if video_id: + query.update({ + 'appCode': app_code, + 'idMedia': video_id, + }) + if self._access_token: + query['access_token'] = self._access_token + try: + return self._download_json( + 'https://services.radio-canada.ca/media/' + path, video_id, query=query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 422): + data = self._parse_json(e.cause.read().decode(), None) + error = data.get('error_description') or data['errorMessage']['text'] + raise ExtractorError(error, expected=True) + raise + + def _extract_info(self, app_code, video_id): + metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas'] + + def get_meta(name): + for meta in metas: + if meta.get('name') == name: + text = meta.get('text') + if text: + return text + + # protectionType does not necessarily mean the video is DRM protected (see + # https://github.com/ytdl-org/youtube-dl/pull/18609). + if get_meta('protectionType'): + self.report_warning('This video is probably DRM protected.') + + query = { + 'connectionType': 'hd', + 'deviceType': 'ipad', + 'multibitrate': 'true', + } + if self._claims: + query['claims'] = self._claims + v_data = self._call_api('validation/v2/', video_id, app_code, query) + v_url = v_data.get('url') + if not v_url: + error = v_data['message'] + if error == "Le contenu sélectionné n'est pas disponible dans votre pays": + raise self.raise_geo_restricted(error, self._GEO_COUNTRIES) + if error == 'Le contenu sélectionné est disponible seulement en premium': + self.raise_login_required(error) + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error), expected=True) + formats = self._extract_m3u8_formats(v_url, video_id, 'mp4') + self._sort_formats(formats) + + subtitles = {} + closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5') + if closed_caption_url: + subtitles['fr'] = [{ + 'url': closed_caption_url, + 'ext': determine_ext(closed_caption_url, 'vtt'), + }] + + return { + 'id': video_id, + 'title': get_meta('Title') or get_meta('AV-nomEmission'), + 'description': get_meta('Description') or get_meta('ShortDescription'), + 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'), + 'duration': int_or_none(get_meta('length')), + 'series': get_meta('Emission'), + 'season_number': int_or_none('SrcSaison'), + 'episode_number': int_or_none('SrcEpisode'), + 'upload_date': unified_strdate(get_meta('Date')), + 'subtitles': subtitles, + 'formats': formats, + } + + def _real_extract(self, url): + return self._extract_info(*re.match(self._VALID_URL, url).groups()) + + +class RadioCanadaAudioVideoIE(InfoExtractor): + IE_NAME = 'radiocanada:audiovideo' + _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', + 'info_dict': { + 'id': '7527184', + 'ext': 'mp4', + 'title': 'Barack Obama au Vietnam', + 'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam', + 'upload_date': '20160523', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam', + 'only_matching': True, + }] + + def _real_extract(self, url): + return self.url_result('radiocanada:medianet:%s' % self._match_id(url)) diff --git a/youtube_dl/extractor/radiode.py b/youtube_dl/extractor/radiode.py new file mode 100644 index 000000000..2c06c8b1e --- /dev/null +++ b/youtube_dl/extractor/radiode.py @@ -0,0 +1,52 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RadioDeIE(InfoExtractor): + IE_NAME = 'radio.de' + _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)' + _TEST = { + 'url': 'http://ndr2.radio.de/', + 'info_dict': { + 'id': 'ndr2', + 'ext': 'mp3', + 'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:591c49c702db1a33751625ebfb67f273', + 'thumbnail': r're:^https?://.*\.png', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + } + + def _real_extract(self, url): + radio_id = self._match_id(url) + webpage = self._download_webpage(url, radio_id) + jscode = self._search_regex( + r"'components/station/stationService':\s*\{\s*'?station'?:\s*(\{.*?\s*\}),\n", + webpage, 'broadcast') + + broadcast = self._parse_json(jscode, radio_id) + title = self._live_title(broadcast['name']) + description = broadcast.get('description') or broadcast.get('shortDescription') + thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100') + + formats = [{ + 'url': stream['streamUrl'], + 'ext': stream['streamContentFormat'].lower(), + 'acodec': stream['streamContentFormat'], + 'abr': stream['bitRate'], + 'asr': stream['sampleRate'] + } for stream in broadcast['streamUrls']] + self._sort_formats(formats) + + return { + 'id': radio_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'is_live': True, + 'formats': formats, + } diff --git a/youtube_dl/extractor/radiofrance.py b/youtube_dl/extractor/radiofrance.py new file mode 100644 index 000000000..a8afc0014 --- /dev/null +++ b/youtube_dl/extractor/radiofrance.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class RadioFranceIE(InfoExtractor): + _VALID_URL = r'^https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)' + IE_NAME = 'radiofrance' + + _TEST = { + 'url': 'http://maison.radiofrance.fr/radiovisions/one-one', + 'md5': 'bdbb28ace95ed0e04faab32ba3160daf', + 'info_dict': { + 'id': 'one-one', + 'ext': 'ogg', + 'title': 'One to one', + 'description': "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.", + 'uploader': 'Thomas Hercouët', + }, + } + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + video_id = m.group('id') + + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title') + description = self._html_search_regex( + r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>', + webpage, 'description', fatal=False) + uploader = self._html_search_regex( + r'<div class="credit">  © (.*?)</div>', + webpage, 'uploader', fatal=False) + + formats_str = self._html_search_regex( + r'class="jp-jplayer[^"]*" data-source="([^"]+)">', + webpage, 'audio URLs') + formats = [ + { + 'format_id': fm[0], + 'url': fm[1], + 'vcodec': 'none', + 'preference': i, + } + for i, fm in + enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)) + ] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': description, + 'uploader': uploader, + } diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py new file mode 100644 index 000000000..3f74f0c01 --- /dev/null +++ b/youtube_dl/extractor/radiojavan.py @@ -0,0 +1,83 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_resolution, + str_to_int, + unified_strdate, + urlencode_postdata, + urljoin, +) + + +class RadioJavanIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?' + _TEST = { + 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam', + 'md5': 'e85208ffa3ca8b83534fca9fe19af95b', + 'info_dict': { + 'id': 'chaartaar-ashoobam', + 'ext': 'mp4', + 'title': 'Chaartaar - Ashoobam', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'upload_date': '20150215', + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + download_host = self._download_json( + 'https://www.radiojavan.com/videos/video_host', video_id, + data=urlencode_postdata({'id': video_id}), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + 'Referer': url, + }).get('host', 'https://host1.rjmusicmedia.com') + + webpage = self._download_webpage(url, video_id) + + formats = [] + for format_id, _, video_path in re.findall( + r'RJ\.video(?P<format_id>\d+[pPkK])\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2', + webpage): + f = parse_resolution(format_id) + f.update({ + 'url': urljoin(download_host, video_path), + 'format_id': format_id, + }) + formats.append(f) + self._sort_formats(formats) + + title = self._og_search_title(webpage) + thumbnail = self._og_search_thumbnail(webpage) + + upload_date = unified_strdate(self._search_regex( + r'class="date_added">Date added: ([^<]+)<', + webpage, 'upload date', fatal=False)) + + view_count = str_to_int(self._search_regex( + r'class="views">Plays: ([\d,]+)', + webpage, 'view count', fatal=False)) + like_count = str_to_int(self._search_regex( + r'class="rating">([\d,]+) likes', + webpage, 'like count', fatal=False)) + dislike_count = str_to_int(self._search_regex( + r'class="rating">([\d,]+) dislikes', + webpage, 'dislike count', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'view_count': view_count, + 'like_count': like_count, + 'dislike_count': dislike_count, + 'formats': formats, + } diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py new file mode 100644 index 000000000..207a6c247 --- /dev/null +++ b/youtube_dl/extractor/rai.py @@ -0,0 +1,502 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_urlparse, + compat_str, +) +from ..utils import ( + ExtractorError, + determine_ext, + find_xpath_attr, + fix_xml_ampersands, + GeoRestrictedError, + int_or_none, + parse_duration, + strip_or_none, + try_get, + unescapeHTML, + unified_strdate, + unified_timestamp, + update_url_query, + urljoin, + xpath_text, +) + + +class RaiBaseIE(InfoExtractor): + _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' + _GEO_COUNTRIES = ['IT'] + _GEO_BYPASS = False + + def _extract_relinker_info(self, relinker_url, video_id): + if not re.match(r'https?://', relinker_url): + return {'formats': [{'url': relinker_url}]} + + formats = [] + geoprotection = None + is_live = None + duration = None + + for platform in ('mon', 'flash', 'native'): + relinker = self._download_xml( + relinker_url, video_id, + note='Downloading XML metadata for platform %s' % platform, + transform_source=fix_xml_ampersands, + query={'output': 45, 'pl': platform}, + headers=self.geo_verification_headers()) + + if not geoprotection: + geoprotection = xpath_text( + relinker, './geoprotection', default=None) == 'Y' + + if not is_live: + is_live = xpath_text( + relinker, './is_live', default=None) == 'Y' + if not duration: + duration = parse_duration(xpath_text( + relinker, './duration', default=None)) + + url_elem = find_xpath_attr(relinker, './url', 'type', 'content') + if url_elem is None: + continue + + media_url = url_elem.text + + # This does not imply geo restriction (e.g. + # http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html) + if media_url == 'http://download.rai.it/video_no_available.mp4': + continue + + ext = determine_ext(media_url) + if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'): + continue + + if ext == 'm3u8' or 'format=m3u8' in media_url or platform == 'mon': + formats.extend(self._extract_m3u8_formats( + media_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif ext == 'f4m' or platform == 'flash': + manifest_url = update_url_query( + media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'), + {'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'}) + formats.extend(self._extract_f4m_formats( + manifest_url, video_id, f4m_id='hds', fatal=False)) + else: + bitrate = int_or_none(xpath_text(relinker, 'bitrate')) + formats.append({ + 'url': media_url, + 'tbr': bitrate if bitrate > 0 else None, + 'format_id': 'http-%d' % bitrate if bitrate > 0 else 'http', + }) + + if not formats and geoprotection is True: + self.raise_geo_restricted(countries=self._GEO_COUNTRIES) + + return dict((k, v) for k, v in { + 'is_live': is_live, + 'duration': duration, + 'formats': formats, + }.items() if v is not None) + + @staticmethod + def _extract_subtitles(url, subtitle_url): + subtitles = {} + if subtitle_url and isinstance(subtitle_url, compat_str): + subtitle_url = urljoin(url, subtitle_url) + STL_EXT = '.stl' + SRT_EXT = '.srt' + subtitles['it'] = [{ + 'ext': 'stl', + 'url': subtitle_url, + }] + if subtitle_url.endswith(STL_EXT): + srt_url = subtitle_url[:-len(STL_EXT)] + SRT_EXT + subtitles['it'].append({ + 'ext': 'srt', + 'url': srt_url, + }) + return subtitles + + +class RaiPlayIE(RaiBaseIE): + _VALID_URL = r'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE._UUID_RE + _TESTS = [{ + 'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter', + 'md5': '340aa3b7afb54bfd14a8c11786450d76', + 'info_dict': { + 'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66', + 'ext': 'mp4', + 'title': 'La Casa Bianca', + 'alt_title': 'S2016 - Puntata del 23/10/2016', + 'description': 'md5:a09d45890850458077d1f68bb036e0a5', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Rai 3', + 'creator': 'Rai 3', + 'duration': 3278, + 'timestamp': 1477764300, + 'upload_date': '20161029', + 'series': 'La Casa Bianca', + 'season': '2016', + }, + }, { + 'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', + 'md5': '8970abf8caf8aef4696e7b1f2adfc696', + 'info_dict': { + 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', + 'ext': 'mp4', + 'title': 'Report del 07/04/2014', + 'alt_title': 'S2013/14 - Puntata del 07/04/2014', + 'description': 'md5:f27c544694cacb46a078db84ec35d2d9', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'Rai 5', + 'creator': 'Rai 5', + 'duration': 6160, + 'series': 'Report', + 'season_number': 5, + 'season': '2013/14', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + url, video_id = mobj.group('url', 'id') + + media = self._download_json( + '%s?json' % url, video_id, 'Downloading video JSON') + + title = media['name'] + + video = media['video'] + + relinker_info = self._extract_relinker_info(video['contentUrl'], video_id) + self._sort_formats(relinker_info['formats']) + + thumbnails = [] + if 'images' in media: + for _, value in media.get('images').items(): + if value: + thumbnails.append({ + 'url': value.replace('[RESOLUTION]', '600x400') + }) + + timestamp = unified_timestamp(try_get( + media, lambda x: x['availabilities'][0]['start'], compat_str)) + + subtitles = self._extract_subtitles(url, video.get('subtitles')) + + info = { + 'id': video_id, + 'title': self._live_title(title) if relinker_info.get( + 'is_live') else title, + 'alt_title': media.get('subtitle'), + 'description': media.get('description'), + 'uploader': strip_or_none(media.get('channel')), + 'creator': strip_or_none(media.get('editor')), + 'duration': parse_duration(video.get('duration')), + 'timestamp': timestamp, + 'thumbnails': thumbnails, + 'series': try_get( + media, lambda x: x['isPartOf']['name'], compat_str), + 'season_number': int_or_none(try_get( + media, lambda x: x['isPartOf']['numeroStagioni'])), + 'season': media.get('stagione') or None, + 'subtitles': subtitles, + } + + info.update(relinker_info) + return info + + +class RaiPlayLiveIE(RaiBaseIE): + _VALID_URL = r'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'http://www.raiplay.it/dirette/rainews24', + 'info_dict': { + 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c', + 'display_id': 'rainews24', + 'ext': 'mp4', + 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'md5:6eca31500550f9376819f174e5644754', + 'uploader': 'Rai News 24', + 'creator': 'Rai News 24', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + r'data-uniquename=["\']ContentItem-(%s)' % RaiBaseIE._UUID_RE, + webpage, 'content id') + + return { + '_type': 'url_transparent', + 'ie_key': RaiPlayIE.ie_key(), + 'url': 'http://www.raiplay.it/dirette/ContentItem-%s.html' % video_id, + 'id': video_id, + 'display_id': display_id, + } + + +class RaiPlayPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/', + 'info_dict': { + 'id': 'nondirloalmiocapo', + 'title': 'Non dirlo al mio capo', + 'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86', + }, + 'playlist_mincount': 12, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + title = self._html_search_meta( + ('programma', 'nomeProgramma'), webpage, 'title') + description = unescapeHTML(self._html_search_meta( + ('description', 'og:description'), webpage, 'description')) + + entries = [] + for mobj in re.finditer( + r'<a\b[^>]+\bhref=(["\'])(?P<path>/raiplay/video/.+?)\1', + webpage): + video_url = urljoin(url, mobj.group('path')) + entries.append(self.url_result( + video_url, ie=RaiPlayIE.ie_key(), + video_id=RaiPlayIE._match_id(video_url))) + + return self.playlist_result(entries, playlist_id, title, description) + + +class RaiIE(RaiBaseIE): + _VALID_URL = r'https?://[^/]+\.(?:rai\.(?:it|tv)|rainews\.it)/.+?-(?P<id>%s)(?:-.+?)?\.html' % RaiBaseIE._UUID_RE + _TESTS = [{ + # var uniquename = "ContentItem-..." + # data-id="ContentItem-..." + 'url': 'http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', + 'info_dict': { + 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', + 'ext': 'mp4', + 'title': 'TG PRIMO TEMPO', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 1758, + 'upload_date': '20140612', + } + }, { + # with ContentItem in many metas + 'url': 'http://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html', + 'info_dict': { + 'id': '1632c009-c843-4836-bb65-80c33084a64b', + 'ext': 'mp4', + 'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"', + 'description': 'I film in uscita questa settimana.', + 'thumbnail': r're:^https?://.*\.png$', + 'duration': 833, + 'upload_date': '20161103', + } + }, { + # with ContentItem in og:url + 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html', + 'md5': '11959b4e44fa74de47011b5799490adf', + 'info_dict': { + 'id': 'efb17665-691c-45d5-a60c-5301333cbb0c', + 'ext': 'mp4', + 'title': 'TG1 ore 20:00 del 03/11/2016', + 'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 2214, + 'upload_date': '20161103', + } + }, { + # drawMediaRaiTV(...) + 'url': 'http://www.report.rai.it/dl/Report/puntata/ContentItem-0c7a664b-d0f4-4b2c-8835-3f82e46f433e.html', + 'md5': '2dd727e61114e1ee9c47f0da6914e178', + 'info_dict': { + 'id': '59d69d28-6bb6-409d-a4b5-ed44096560af', + 'ext': 'mp4', + 'title': 'Il pacco', + 'description': 'md5:4b1afae1364115ce5d78ed83cd2e5b3a', + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20141221', + }, + }, { + # initEdizione('ContentItem-...' + 'url': 'http://www.tg1.rai.it/dl/tg1/2010/edizioni/ContentSet-9b6e0cba-4bef-4aef-8cf0-9f7f665b7dfb-tg1.html?item=undefined', + 'info_dict': { + 'id': 'c2187016-8484-4e3a-8ac8-35e475b07303', + 'ext': 'mp4', + 'title': r're:TG1 ore \d{2}:\d{2} del \d{2}/\d{2}/\d{4}', + 'duration': 2274, + 'upload_date': '20170401', + }, + 'skip': 'Changes daily', + }, { + # HDS live stream with only relinker URL + 'url': 'http://www.rai.tv/dl/RaiTV/dirette/PublishingBlock-1912dbbf-3f96-44c3-b4cf-523681fbacbc.html?channel=EuroNews', + 'info_dict': { + 'id': '1912dbbf-3f96-44c3-b4cf-523681fbacbc', + 'ext': 'flv', + 'title': 'EuroNews', + }, + 'params': { + 'skip_download': True, + }, + }, { + # HLS live stream with ContentItem in og:url + 'url': 'http://www.rainews.it/dl/rainews/live/ContentItem-3156f2f2-dc70-4953-8e2f-70d7489d4ce9.html', + 'info_dict': { + 'id': '3156f2f2-dc70-4953-8e2f-70d7489d4ce9', + 'ext': 'mp4', + 'title': 'La diretta di Rainews24', + }, + 'params': { + 'skip_download': True, + }, + }, { + # Direct MMS URL + 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html', + 'only_matching': True, + }, { + 'url': 'https://www.rainews.it/tgr/marche/notiziari/video/2019/02/ContentItem-6ba945a2-889c-4a80-bdeb-8489c70a8db9.html', + 'only_matching': True, + }] + + def _extract_from_content_id(self, content_id, url): + media = self._download_json( + 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-%s.html?json' % content_id, + content_id, 'Downloading video JSON') + + title = media['name'].strip() + + media_type = media['type'] + if 'Audio' in media_type: + relinker_info = { + 'formats': [{ + 'format_id': media.get('formatoAudio'), + 'url': media['audioUrl'], + 'ext': media.get('formatoAudio'), + }] + } + elif 'Video' in media_type: + relinker_info = self._extract_relinker_info(media['mediaUri'], content_id) + else: + raise ExtractorError('not a media file') + + self._sort_formats(relinker_info['formats']) + + thumbnails = [] + for image_type in ('image', 'image_medium', 'image_300'): + thumbnail_url = media.get(image_type) + if thumbnail_url: + thumbnails.append({ + 'url': compat_urlparse.urljoin(url, thumbnail_url), + }) + + subtitles = self._extract_subtitles(url, media.get('subtitlesUrl')) + + info = { + 'id': content_id, + 'title': title, + 'description': strip_or_none(media.get('desc')), + 'thumbnails': thumbnails, + 'uploader': media.get('author'), + 'upload_date': unified_strdate(media.get('date')), + 'duration': parse_duration(media.get('length')), + 'subtitles': subtitles, + } + + info.update(relinker_info) + + return info + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + content_item_id = None + + content_item_url = self._html_search_meta( + ('og:url', 'og:video', 'og:video:secure_url', 'twitter:url', + 'twitter:player', 'jsonlink'), webpage, default=None) + if content_item_url: + content_item_id = self._search_regex( + r'ContentItem-(%s)' % self._UUID_RE, content_item_url, + 'content item id', default=None) + + if not content_item_id: + content_item_id = self._search_regex( + r'''(?x) + (?: + (?:initEdizione|drawMediaRaiTV)\(| + <(?:[^>]+\bdata-id|var\s+uniquename)= + ) + (["\']) + (?:(?!\1).)*\bContentItem-(?P<id>%s) + ''' % self._UUID_RE, + webpage, 'content item id', default=None, group='id') + + content_item_ids = set() + if content_item_id: + content_item_ids.add(content_item_id) + if video_id not in content_item_ids: + content_item_ids.add(video_id) + + for content_item_id in content_item_ids: + try: + return self._extract_from_content_id(content_item_id, url) + except GeoRestrictedError: + raise + except ExtractorError: + pass + + relinker_url = self._search_regex( + r'''(?x) + (?: + var\s+videoURL| + mediaInfo\.mediaUri + )\s*=\s* + ([\'"]) + (?P<url> + (?:https?:)? + //mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\? + (?:(?!\1).)*\bcont=(?:(?!\1).)+)\1 + ''', + webpage, 'relinker URL', group='url') + + relinker_info = self._extract_relinker_info( + urljoin(url, relinker_url), video_id) + self._sort_formats(relinker_info['formats']) + + title = self._search_regex( + r'var\s+videoTitolo\s*=\s*([\'"])(?P<title>[^\'"]+)\1', + webpage, 'title', group='title', + default=None) or self._og_search_title(webpage) + + info = { + 'id': video_id, + 'title': title, + } + + info.update(relinker_info) + + return info diff --git a/youtube_dl/extractor/raywenderlich.py b/youtube_dl/extractor/raywenderlich.py new file mode 100644 index 000000000..5411ece21 --- /dev/null +++ b/youtube_dl/extractor/raywenderlich.py @@ -0,0 +1,179 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .vimeo import VimeoIE +from ..compat import compat_str +from ..utils import ( + ExtractorError, + int_or_none, + merge_dicts, + try_get, + unescapeHTML, + unified_timestamp, + urljoin, +) + + +class RayWenderlichIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + videos\.raywenderlich\.com/courses| + (?:www\.)?raywenderlich\.com + )/ + (?P<course_id>[^/]+)/lessons/(?P<id>\d+) + ''' + + _TESTS = [{ + 'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1', + 'info_dict': { + 'id': '248377018', + 'ext': 'mp4', + 'title': 'Introduction', + 'description': 'md5:804d031b3efa9fcb49777d512d74f722', + 'timestamp': 1513906277, + 'upload_date': '20171222', + 'duration': 133, + 'uploader': 'Ray Wenderlich', + 'uploader_id': 'user3304672', + }, + 'params': { + 'noplaylist': True, + 'skip_download': True, + }, + 'add_ie': [VimeoIE.ie_key()], + 'expected_warnings': ['HTTP Error 403: Forbidden'], + }, { + 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1', + 'only_matching': True, + }] + + @staticmethod + def _extract_video_id(data, lesson_id): + if not data: + return + groups = try_get(data, lambda x: x['groups'], list) or [] + if not groups: + return + for group in groups: + if not isinstance(group, dict): + continue + contents = try_get(data, lambda x: x['contents'], list) or [] + for content in contents: + if not isinstance(content, dict): + continue + ordinal = int_or_none(content.get('ordinal')) + if ordinal != lesson_id: + continue + video_id = content.get('identifier') + if video_id: + return compat_str(video_id) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + course_id, lesson_id = mobj.group('course_id', 'id') + display_id = '%s/%s' % (course_id, lesson_id) + + webpage = self._download_webpage(url, display_id) + + thumbnail = self._og_search_thumbnail( + webpage, default=None) or self._html_search_meta( + 'twitter:image', webpage, 'thumbnail') + + if '>Subscribe to unlock' in webpage: + raise ExtractorError( + 'This content is only available for subscribers', + expected=True) + + info = { + 'thumbnail': thumbnail, + } + + vimeo_id = self._search_regex( + r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None) + + if not vimeo_id: + data = self._parse_json( + self._search_regex( + r'data-collection=(["\'])(?P<data>{.+?})\1', webpage, + 'data collection', default='{}', group='data'), + display_id, transform_source=unescapeHTML, fatal=False) + video_id = self._extract_video_id( + data, lesson_id) or self._search_regex( + r'/videos/(\d+)/', thumbnail, 'video id') + headers = { + 'Referer': url, + 'X-Requested-With': 'XMLHttpRequest', + } + csrf_token = self._html_search_meta( + 'csrf-token', webpage, 'csrf token', default=None) + if csrf_token: + headers['X-CSRF-Token'] = csrf_token + video = self._download_json( + 'https://videos.raywenderlich.com/api/v1/videos/%s.json' + % video_id, display_id, headers=headers)['video'] + vimeo_id = video['clips'][0]['provider_id'] + info.update({ + '_type': 'url_transparent', + 'title': video.get('name'), + 'description': video.get('description') or video.get( + 'meta_description'), + 'duration': int_or_none(video.get('duration')), + 'timestamp': unified_timestamp(video.get('created_at')), + }) + + return merge_dicts(info, self.url_result( + VimeoIE._smuggle_referrer( + 'https://player.vimeo.com/video/%s' % vimeo_id, url), + ie=VimeoIE.ie_key(), video_id=vimeo_id)) + + +class RayWenderlichCourseIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + (?: + videos\.raywenderlich\.com/courses| + (?:www\.)?raywenderlich\.com + )/ + (?P<id>[^/]+) + ''' + + _TEST = { + 'url': 'https://www.raywenderlich.com/3530-testing-in-ios', + 'info_dict': { + 'title': 'Testing in iOS', + 'id': '3530-testing-in-ios', + }, + 'params': { + 'noplaylist': False, + }, + 'playlist_count': 29, + } + + @classmethod + def suitable(cls, url): + return False if RayWenderlichIE.suitable(url) else super( + RayWenderlichCourseIE, cls).suitable(url) + + def _real_extract(self, url): + course_id = self._match_id(url) + + webpage = self._download_webpage(url, course_id) + + entries = [] + lesson_urls = set() + for lesson_url in re.findall( + r'<a[^>]+\bhref=["\'](/%s/lessons/\d+)' % course_id, webpage): + if lesson_url in lesson_urls: + continue + lesson_urls.add(lesson_url) + entries.append(self.url_result( + urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key())) + + title = self._og_search_title( + webpage, default=None) or self._html_search_meta( + 'twitter:title', webpage, 'title', default=None) + + return self.playlist_result(entries, course_id, title) diff --git a/youtube_dl/extractor/rbmaradio.py b/youtube_dl/extractor/rbmaradio.py new file mode 100644 index 000000000..ae7413fb5 --- /dev/null +++ b/youtube_dl/extractor/rbmaradio.py @@ -0,0 +1,72 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + clean_html, + int_or_none, + unified_timestamp, + update_url_query, +) + + +class RBMARadioIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?:rbmaradio|redbullradio)\.com/shows/(?P<show_id>[^/]+)/episodes/(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'https://www.rbmaradio.com/shows/main-stage/episodes/ford-lopatin-live-at-primavera-sound-2011', + 'md5': '6bc6f9bcb18994b4c983bc3bf4384d95', + 'info_dict': { + 'id': 'ford-lopatin-live-at-primavera-sound-2011', + 'ext': 'mp3', + 'title': 'Main Stage - Ford & Lopatin at Primavera Sound', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 2452, + 'timestamp': 1307103164, + 'upload_date': '20110603', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + show_id = mobj.group('show_id') + episode_id = mobj.group('id') + + webpage = self._download_webpage(url, episode_id) + + episode = self._parse_json( + self._search_regex( + r'__INITIAL_STATE__\s*=\s*({.+?})\s*</script>', + webpage, 'json data'), + episode_id)['episodes'][show_id][episode_id] + + title = episode['title'] + + show_title = episode.get('showTitle') + if show_title: + title = '%s - %s' % (show_title, title) + + formats = [{ + 'url': update_url_query(episode['audioURL'], query={'cbr': abr}), + 'format_id': compat_str(abr), + 'abr': abr, + 'vcodec': 'none', + } for abr in (96, 128, 192, 256)] + self._check_formats(formats, episode_id) + + description = clean_html(episode.get('longTeaser')) + thumbnail = self._proto_relative_url(episode.get('imageURL', {}).get('landscape')) + duration = int_or_none(episode.get('duration')) + timestamp = unified_timestamp(episode.get('publishedAt')) + + return { + 'id': episode_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'timestamp': timestamp, + 'formats': formats, + } diff --git a/youtube_dl/extractor/rds.py b/youtube_dl/extractor/rds.py new file mode 100644 index 000000000..8c016a77d --- /dev/null +++ b/youtube_dl/extractor/rds.py @@ -0,0 +1,70 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + parse_iso8601, + js_to_json, +) +from ..compat import compat_str + + +class RDSIE(InfoExtractor): + IE_DESC = 'RDS.ca' + _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+' + + _TESTS = [{ + 'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799', + 'info_dict': { + 'id': '604333', + 'display_id': 'fowler-jr-prend-la-direction-de-jacksonville', + 'ext': 'flv', + 'title': 'Fowler Jr. prend la direction de Jacksonville', + 'description': 'Dante Fowler Jr. est le troisième choix du repêchage 2015 de la NFL. ', + 'timestamp': 1430397346, + 'upload_date': '20150430', + 'duration': 154.354, + 'age_limit': 0, + } + }, { + 'url': 'http://www.rds.ca/vid%C3%A9os/un-voyage-positif-3.877934', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json) + video_id = compat_str(item['id']) + title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta( + 'title', webpage, 'title', fatal=True) + description = self._og_search_description(webpage) or self._html_search_meta( + 'description', webpage, 'description') + thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex( + [r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"', + r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'], + webpage, 'thumbnail', fatal=False) + timestamp = parse_iso8601(self._search_regex( + r'<span[^>]+itemprop="uploadDate"[^>]+content="([^"]+)"', + webpage, 'upload date', fatal=False)) + duration = parse_duration(self._search_regex( + r'<span[^>]+itemprop="duration"[^>]+content="([^"]+)"', + webpage, 'duration', fatal=False)) + age_limit = self._family_friendly_search(webpage) + + return { + '_type': 'url_transparent', + 'id': video_id, + 'display_id': display_id, + 'url': '9c9media:rds_web:%s' % video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'age_limit': age_limit, + 'ie_key': 'NineCNineMedia', + } diff --git a/youtube_dl/extractor/redbulltv.py b/youtube_dl/extractor/redbulltv.py new file mode 100644 index 000000000..dbe1aaded --- /dev/null +++ b/youtube_dl/extractor/redbulltv.py @@ -0,0 +1,128 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ( + float_or_none, + ExtractorError, +) + + +class RedBullTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)(?:/events/[^/]+)?/(?:videos?|live)/(?P<id>AP-\w+)' + _TESTS = [{ + # film + 'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11', + 'md5': 'fb0445b98aa4394e504b413d98031d1f', + 'info_dict': { + 'id': 'AP-1Q6XCDTAN1W11', + 'ext': 'mp4', + 'title': 'ABC of... WRC - ABC of... S1E6', + 'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31', + 'duration': 1582.04, + }, + }, { + # episode + 'url': 'https://www.redbull.tv/video/AP-1PMHKJFCW1W11', + 'info_dict': { + 'id': 'AP-1PMHKJFCW1W11', + 'ext': 'mp4', + 'title': 'Grime - Hashtags S2E4', + 'description': 'md5:b5f522b89b72e1e23216e5018810bb25', + 'duration': 904.6, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.redbull.com/int-en/tv/video/AP-1UWHCAR9S1W11/rob-meets-sam-gaze?playlist=playlists::3f81040a-2f31-4832-8e2e-545b1d39d173', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/us-en/videos/AP-1YM9QCYE52111', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/us-en/events/AP-1XV2K61Q51W11/live/AP-1XUJ86FDH1W11', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + session = self._download_json( + 'https://api.redbull.tv/v3/session', video_id, + note='Downloading access token', query={ + 'category': 'personal_computer', + 'os_family': 'http', + }) + if session.get('code') == 'error': + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, session['message'])) + token = session['token'] + + try: + video = self._download_json( + 'https://api.redbull.tv/v3/products/' + video_id, + video_id, note='Downloading video information', + headers={'Authorization': token} + ) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: + error_message = self._parse_json( + e.cause.read().decode(), video_id)['error'] + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, error_message), expected=True) + raise + + title = video['title'].strip() + + formats = self._extract_m3u8_formats( + 'https://dms.redbull.tv/v3/%s/%s/playlist.m3u8' % (video_id, token), + video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') + self._sort_formats(formats) + + subtitles = {} + for resource in video.get('resources', []): + if resource.startswith('closed_caption_'): + splitted_resource = resource.split('_') + if splitted_resource[2]: + subtitles.setdefault('en', []).append({ + 'url': 'https://resources.redbull.tv/%s/%s' % (video_id, resource), + 'ext': splitted_resource[2], + }) + + subheading = video.get('subheading') + if subheading: + title += ' - %s' % subheading + + return { + 'id': video_id, + 'title': title, + 'description': video.get('long_description') or video.get( + 'short_description'), + 'duration': float_or_none(video.get('duration'), scale=1000), + 'formats': formats, + 'subtitles': subtitles, + } + + +class RedBullTVRrnContentIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)/(?:video|live)/rrn:content:[^:]+:(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' + _TESTS = [{ + 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:live-videos:e3e6feb4-e95f-50b7-962a-c70f8fd13c73/mens-dh-finals-fort-william', + 'only_matching': True, + }, { + 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:videos:a36a0f36-ff1b-5db8-a69d-ee11a14bf48b/tn-ts-style?playlist=rrn:content:event-profiles:83f05926-5de8-5389-b5e4-9bb312d715e8:extras', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_url = self._og_search_url(webpage) + + return self.url_result( + video_url, ie=RedBullTVIE.ie_key(), + video_id=RedBullTVIE._match_id(video_url)) diff --git a/youtube_dl/extractor/reddit.py b/youtube_dl/extractor/reddit.py new file mode 100644 index 000000000..663f622b3 --- /dev/null +++ b/youtube_dl/extractor/reddit.py @@ -0,0 +1,130 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + float_or_none, + url_or_none, +) + + +class RedditIE(InfoExtractor): + _VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)' + _TEST = { + # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/ + 'url': 'https://v.redd.it/zv89llsvexdz', + 'md5': '0a070c53eba7ec4534d95a5a1259e253', + 'info_dict': { + 'id': 'zv89llsvexdz', + 'ext': 'mp4', + 'title': 'zv89llsvexdz', + }, + 'params': { + 'format': 'bestvideo', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + formats = self._extract_m3u8_formats( + 'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id, + 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) + + formats.extend(self._extract_mpd_formats( + 'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id, + mpd_id='dash', fatal=False)) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_id, + 'formats': formats, + } + + +class RedditRIE(InfoExtractor): + _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))' + _TESTS = [{ + 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', + 'info_dict': { + 'id': 'zv89llsvexdz', + 'ext': 'mp4', + 'title': 'That small heart attack.', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1501941939, + 'upload_date': '20170805', + 'uploader': 'Antw87', + 'like_count': int, + 'dislike_count': int, + 'comment_count': int, + 'age_limit': 0, + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + }, { + 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj', + 'only_matching': True, + }, { + # imgur + 'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/', + 'only_matching': True, + }, { + # imgur @ old reddit + 'url': 'https://old.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/', + 'only_matching': True, + }, { + # streamable + 'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/', + 'only_matching': True, + }, { + # youtube + 'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/', + 'only_matching': True, + }, { + # reddit video @ nm reddit + 'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + url, video_id = mobj.group('url', 'id') + + video_id = self._match_id(url) + + data = self._download_json( + url + '/.json', video_id)[0]['data']['children'][0]['data'] + + video_url = data['url'] + + # Avoid recursing into the same reddit URL + if 'reddit.com/' in video_url and '/%s/' % video_id in video_url: + raise ExtractorError('No media found', expected=True) + + over_18 = data.get('over_18') + if over_18 is True: + age_limit = 18 + elif over_18 is False: + age_limit = 0 + else: + age_limit = None + + return { + '_type': 'url_transparent', + 'url': video_url, + 'title': data.get('title'), + 'thumbnail': url_or_none(data.get('thumbnail')), + 'timestamp': float_or_none(data.get('created_utc')), + 'uploader': data.get('author'), + 'like_count': int_or_none(data.get('ups')), + 'dislike_count': int_or_none(data.get('downs')), + 'comment_count': int_or_none(data.get('num_comments')), + 'age_limit': age_limit, + } diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py new file mode 100644 index 000000000..10311a81a --- /dev/null +++ b/youtube_dl/extractor/redtube.py @@ -0,0 +1,115 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + str_to_int, + unified_strdate, + url_or_none, +) + + +class RedTubeIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://www.redtube.com/66418', + 'md5': 'fc08071233725f26b8f014dba9590005', + 'info_dict': { + 'id': '66418', + 'ext': 'mp4', + 'title': 'Sucked on a toilet', + 'upload_date': '20110811', + 'duration': 596, + 'view_count': int, + 'age_limit': 18, + } + }, { + 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)', + webpage) + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'http://www.redtube.com/%s' % video_id, video_id) + + if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']): + raise ExtractorError('Video %s has been removed' % video_id, expected=True) + + title = self._html_search_regex( + (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>', + r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',), + webpage, 'title', group='title', + default=None) or self._og_search_title(webpage) + + formats = [] + sources = self._parse_json( + self._search_regex( + r'sources\s*:\s*({.+?})', webpage, 'source', default='{}'), + video_id, fatal=False) + if sources and isinstance(sources, dict): + for format_id, format_url in sources.items(): + if format_url: + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'height': int_or_none(format_id), + }) + medias = self._parse_json( + self._search_regex( + r'mediaDefinition\s*:\s*(\[.+?\])', webpage, + 'media definitions', default='{}'), + video_id, fatal=False) + if medias and isinstance(medias, list): + for media in medias: + format_url = url_or_none(media.get('videoUrl')) + if not format_url: + continue + format_id = media.get('quality') + formats.append({ + 'url': format_url, + 'format_id': format_id, + 'height': int_or_none(format_id), + }) + if not formats: + video_url = self._html_search_regex( + r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL') + formats.append({'url': video_url}) + self._sort_formats(formats) + + thumbnail = self._og_search_thumbnail(webpage) + upload_date = unified_strdate(self._search_regex( + r'<span[^>]+>ADDED ([^<]+)<', + webpage, 'upload date', fatal=False)) + duration = int_or_none(self._og_search_property( + 'video:duration', webpage, default=None) or self._search_regex( + r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None)) + view_count = str_to_int(self._search_regex( + (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)', + r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)'), + webpage, 'view count', fatal=False)) + + # No self-labeling, but they describe themselves as + # "Home of Videos Porno" + age_limit = 18 + + return { + 'id': video_id, + 'ext': 'mp4', + 'title': title, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'duration': duration, + 'view_count': view_count, + 'age_limit': age_limit, + 'formats': formats, + } diff --git a/youtube_dl/extractor/regiotv.py b/youtube_dl/extractor/regiotv.py new file mode 100644 index 000000000..e250a52f0 --- /dev/null +++ b/youtube_dl/extractor/regiotv.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import ( + sanitized_Request, + xpath_text, + xpath_with_ns, +) + + +class RegioTVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?regio-tv\.de/video/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://www.regio-tv.de/video/395808.html', + 'info_dict': { + 'id': '395808', + 'ext': 'mp4', + 'title': 'Wir in Ludwigsburg', + 'description': 'Mit unseren zuckersüßen Adventskindern, außerdem besuchen wir die Abendsterne!', + } + }, { + 'url': 'http://www.regio-tv.de/video/395808', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + key = self._search_regex( + r'key\s*:\s*(["\'])(?P<key>.+?)\1', webpage, 'key', group='key') + title = self._og_search_title(webpage) + + SOAP_TEMPLATE = '<?xml version="1.0" encoding="utf-8"?><soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><{0} xmlns="http://v.telvi.de/"><key xsi:type="xsd:string">{1}</key></{0}></soap:Body></soap:Envelope>' + + request = sanitized_Request( + 'http://v.telvi.de/', + SOAP_TEMPLATE.format('GetHTML5VideoData', key).encode('utf-8')) + video_data = self._download_xml(request, video_id, 'Downloading video XML') + + NS_MAP = { + 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', + 'soap': 'http://schemas.xmlsoap.org/soap/envelope/', + } + + video_url = xpath_text( + video_data, xpath_with_ns('.//video', NS_MAP), 'video url', fatal=True) + thumbnail = xpath_text( + video_data, xpath_with_ns('.//image', NS_MAP), 'thumbnail') + description = self._og_search_description( + webpage) or self._html_search_meta('description', webpage) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/rentv.py b/youtube_dl/extractor/rentv.py new file mode 100644 index 000000000..7c8909d95 --- /dev/null +++ b/youtube_dl/extractor/rentv.py @@ -0,0 +1,106 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + determine_ext, + int_or_none, + url_or_none, +) + + +class RENTVIE(InfoExtractor): + _VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://ren.tv/video/epizod/118577', + 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb', + 'info_dict': { + 'id': '118577', + 'ext': 'mp4', + 'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"', + 'timestamp': 1472230800, + 'upload_date': '20160826', + } + }, { + 'url': 'http://ren.tv/player/118577', + 'only_matching': True, + }, { + 'url': 'rentv:118577', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage('http://ren.tv/player/' + video_id, video_id) + config = self._parse_json(self._search_regex( + r'config\s*=\s*({.+})\s*;', webpage, 'config'), video_id) + title = config['title'] + formats = [] + for video in config['src']: + src = url_or_none(video.get('src')) + if not src: + continue + ext = determine_ext(src) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': src, + }) + self._sort_formats(formats) + return { + 'id': video_id, + 'title': title, + 'description': config.get('description'), + 'thumbnail': config.get('image'), + 'duration': int_or_none(config.get('duration')), + 'timestamp': int_or_none(config.get('date')), + 'formats': formats, + } + + +class RENTVArticleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ren\.tv/novosti/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)' + _TESTS = [{ + 'url': 'http://ren.tv/novosti/2016-10-26/video-mikroavtobus-popavshiy-v-dtp-s-gruzovikami-v-podmoskove-prevratilsya-v', + 'md5': 'ebd63c4680b167693745ab91343df1d6', + 'info_dict': { + 'id': '136472', + 'ext': 'mp4', + 'title': 'Видео: микроавтобус, попавший в ДТП с грузовиками в Подмосковье, превратился в груду металла', + 'description': 'Жертвами столкновения двух фур и микроавтобуса, по последним данным, стали семь человек.', + } + }, { + # TODO: invalid m3u8 + 'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video', + 'info_dict': { + 'id': 'playlist', + 'ext': 'mp4', + 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ', + 'uploader': 'ren.tv', + }, + 'params': { + # m3u8 downloads + 'skip_download': True, + }, + 'skip': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + drupal_settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), display_id) + + entries = [] + for config_profile in drupal_settings.get('ren_jwplayer', {}).values(): + media_id = config_profile.get('mediaid') + if not media_id: + continue + media_id = compat_str(media_id) + entries.append(self.url_result('rentv:' + media_id, 'RENTV', media_id)) + return self.playlist_result(entries, display_id) diff --git a/youtube_dl/extractor/restudy.py b/youtube_dl/extractor/restudy.py new file mode 100644 index 000000000..d47fb45ca --- /dev/null +++ b/youtube_dl/extractor/restudy.py @@ -0,0 +1,44 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RestudyIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www|portal)\.)?restudy\.dk/video/[^/]+/id/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'https://www.restudy.dk/video/play/id/1637', + 'info_dict': { + 'id': '1637', + 'ext': 'flv', + 'title': 'Leiden-frosteffekt', + 'description': 'Denne video er et eksperiment med flydende kvælstof.', + }, + 'params': { + # rtmp download + 'skip_download': True, + } + }, { + 'url': 'https://portal.restudy.dk/video/leiden-frosteffekt/id/1637', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage).strip() + description = self._og_search_description(webpage).strip() + + formats = self._extract_smil_formats( + 'https://cdn.portal.restudy.dk/dynamic/themes/front/awsmedia/SmilDirectory/video_%s.xml' % video_id, + video_id) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'formats': formats, + } diff --git a/youtube_dl/extractor/reuters.py b/youtube_dl/extractor/reuters.py new file mode 100644 index 000000000..9dc482d21 --- /dev/null +++ b/youtube_dl/extractor/reuters.py @@ -0,0 +1,69 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + js_to_json, + int_or_none, + unescapeHTML, +) + + +class ReutersIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562', + 'md5': '8015113643a0b12838f160b0b81cc2ee', + 'info_dict': { + 'id': '368575562', + 'ext': 'mp4', + 'title': 'San Francisco police chief resigns', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id) + video_data = js_to_json(self._search_regex( + r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);', + webpage, 'video data')) + + def get_json_value(key, fatal=False): + return self._search_regex(r'"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal) + + title = unescapeHTML(get_json_value('title', fatal=True)) + mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups() + + mas_data = self._download_json( + 'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid), + video_id, transform_source=js_to_json) + formats = [] + for f in mas_data: + f_url = f.get('url') + if not f_url: + continue + method = f.get('method') + if method == 'hls': + formats.extend(self._extract_m3u8_formats( + f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + else: + container = f.get('container') + ext = '3gp' if method == 'mobile' else container + formats.append({ + 'format_id': ext, + 'url': f_url, + 'ext': ext, + 'container': container if method != 'mobile' else None, + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'thumbnail': get_json_value('thumb'), + 'duration': int_or_none(get_json_value('seconds')), + 'formats': formats, + } diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py new file mode 100644 index 000000000..4cb99c244 --- /dev/null +++ b/youtube_dl/extractor/reverbnation.py @@ -0,0 +1,53 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + qualities, + str_or_none, +) + + +class ReverbNationIE(InfoExtractor): + _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$' + _TESTS = [{ + 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', + 'md5': 'c0aaf339bcee189495fdf5a8c8ba8645', + 'info_dict': { + 'id': '16965047', + 'ext': 'mp3', + 'title': 'MONA LISA', + 'uploader': 'ALKILADOS', + 'uploader_id': '216429', + 'thumbnail': r're:^https?://.*\.jpg', + }, + }] + + def _real_extract(self, url): + song_id = self._match_id(url) + + api_res = self._download_json( + 'https://api.reverbnation.com/song/%s' % song_id, + song_id, + note='Downloading information of song %s' % song_id + ) + + THUMBNAILS = ('thumbnail', 'image') + quality = qualities(THUMBNAILS) + thumbnails = [] + for thumb_key in THUMBNAILS: + if api_res.get(thumb_key): + thumbnails.append({ + 'url': api_res[thumb_key], + 'preference': quality(thumb_key) + }) + + return { + 'id': song_id, + 'title': api_res['name'], + 'url': api_res['url'], + 'uploader': api_res.get('artist', {}).get('name'), + 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), + 'thumbnails': thumbnails, + 'ext': 'mp3', + 'vcodec': 'none', + } diff --git a/youtube_dl/extractor/revision3.py b/youtube_dl/extractor/revision3.py new file mode 100644 index 000000000..833d8a2f0 --- /dev/null +++ b/youtube_dl/extractor/revision3.py @@ -0,0 +1,170 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + parse_iso8601, + unescapeHTML, + qualities, +) + + +class Revision3EmbedIE(InfoExtractor): + IE_NAME = 'revision3:embed' + _VALID_URL = r'(?:revision3:(?:(?P<playlist_type>[^:]+):)?|https?://(?:(?:(?:www|embed)\.)?(?:revision3|animalist)|(?:(?:api|embed)\.)?seekernetwork)\.com/player/embed\?videoId=)(?P<playlist_id>\d+)' + _TEST = { + 'url': 'http://api.seekernetwork.com/player/embed?videoId=67558', + 'md5': '83bcd157cab89ad7318dd7b8c9cf1306', + 'info_dict': { + 'id': '67558', + 'ext': 'mp4', + 'title': 'The Pros & Cons Of Zoos', + 'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?', + 'uploader_id': 'dnews', + 'uploader': 'DNews', + } + } + _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('playlist_id') + playlist_type = mobj.group('playlist_type') or 'video_id' + video_data = self._download_json( + 'http://revision3.com/api/getPlaylist.json', playlist_id, query={ + 'api_key': self._API_KEY, + 'codecs': 'h264,vp8,theora', + playlist_type: playlist_id, + })['items'][0] + + formats = [] + for vcodec, media in video_data['media'].items(): + for quality_id, quality in media.items(): + if quality_id == 'hls': + formats.extend(self._extract_m3u8_formats( + quality['url'], playlist_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': quality['url'], + 'format_id': '%s-%s' % (vcodec, quality_id), + 'tbr': int_or_none(quality.get('bitrate')), + 'vcodec': vcodec, + }) + self._sort_formats(formats) + + return { + 'id': playlist_id, + 'title': unescapeHTML(video_data['title']), + 'description': unescapeHTML(video_data.get('summary')), + 'uploader': video_data.get('show', {}).get('name'), + 'uploader_id': video_data.get('show', {}).get('slug'), + 'duration': int_or_none(video_data.get('duration')), + 'formats': formats, + } + + +class Revision3IE(InfoExtractor): + IE_NAME = 'revision' + _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)' + _TESTS = [{ + 'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016', + 'md5': 'd94a72d85d0a829766de4deb8daaf7df', + 'info_dict': { + 'id': '71089', + 'display_id': 'technobuffalo/5-google-predictions-for-2016', + 'ext': 'webm', + 'title': '5 Google Predictions for 2016', + 'description': 'Google had a great 2015, but it\'s already time to look ahead. Here are our five predictions for 2016.', + 'upload_date': '20151228', + 'timestamp': 1451325600, + 'duration': 187, + 'uploader': 'TechnoBuffalo', + 'uploader_id': 'technobuffalo', + } + }, { + # Show + 'url': 'http://revision3.com/variant', + 'only_matching': True, + }, { + # Tag + 'url': 'http://revision3.com/vr', + 'only_matching': True, + }] + _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s' + + def _real_extract(self, url): + domain, display_id = re.match(self._VALID_URL, url).groups() + site = domain.split('.')[0] + page_info = self._download_json( + self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id) + + page_data = page_info['data'] + page_type = page_data['type'] + if page_type in ('episode', 'embed'): + show_data = page_data['show']['data'] + page_id = compat_str(page_data['id']) + video_id = compat_str(page_data['video']['data']['id']) + + preference = qualities(['mini', 'small', 'medium', 'large']) + thumbnails = [{ + 'url': image_url, + 'id': image_id, + 'preference': preference(image_id) + } for image_id, image_url in page_data.get('images', {}).items()] + + info = { + 'id': page_id, + 'display_id': display_id, + 'title': unescapeHTML(page_data['name']), + 'description': unescapeHTML(page_data.get('summary')), + 'timestamp': parse_iso8601(page_data.get('publishTime'), ' '), + 'author': page_data.get('author'), + 'uploader': show_data.get('name'), + 'uploader_id': show_data.get('slug'), + 'thumbnails': thumbnails, + 'extractor_key': site, + } + + if page_type == 'embed': + info.update({ + '_type': 'url_transparent', + 'url': page_data['video']['data']['embed'], + }) + return info + + info.update({ + '_type': 'url_transparent', + 'url': 'revision3:%s' % video_id, + }) + return info + else: + list_data = page_info[page_type]['data'] + episodes_data = page_info['episodes']['data'] + num_episodes = page_info['meta']['totalEpisodes'] + processed_episodes = 0 + entries = [] + page_num = 1 + while True: + entries.extend([{ + '_type': 'url', + 'url': 'http://%s%s' % (domain, episode['path']), + 'id': compat_str(episode['id']), + 'ie_key': 'Revision3', + 'extractor_key': site, + } for episode in episodes_data]) + processed_episodes += len(episodes_data) + if processed_episodes == num_episodes: + break + page_num += 1 + episodes_data = self._download_json(self._PAGE_DATA_TEMPLATE % ( + domain, display_id + '/' + compat_str(page_num), domain), + display_id)['episodes']['data'] + + return self.playlist_result( + entries, compat_str(list_data['id']), + list_data.get('name'), list_data.get('summary')) diff --git a/youtube_dl/extractor/rice.py b/youtube_dl/extractor/rice.py new file mode 100644 index 000000000..f855719ac --- /dev/null +++ b/youtube_dl/extractor/rice.py @@ -0,0 +1,116 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_parse_qs +from ..utils import ( + xpath_text, + xpath_element, + int_or_none, + parse_iso8601, + ExtractorError, +) + + +class RICEIE(InfoExtractor): + _VALID_URL = r'https?://mediahub\.rice\.edu/app/[Pp]ortal/video\.aspx\?(?P<query>.+)' + _TEST = { + 'url': 'https://mediahub.rice.edu/app/Portal/video.aspx?PortalID=25ffd62c-3d01-4b29-8c70-7c94270efb3e&DestinationID=66bc9434-03bd-4725-b47e-c659d8d809db&ContentID=YEWIvbhb40aqdjMD1ALSqw', + 'md5': '9b83b4a2eead4912dc3b7fac7c449b6a', + 'info_dict': { + 'id': 'YEWIvbhb40aqdjMD1ALSqw', + 'ext': 'mp4', + 'title': 'Active Learning in Archeology', + 'upload_date': '20140616', + 'timestamp': 1402926346, + } + } + _NS = 'http://schemas.datacontract.org/2004/07/ensembleVideo.Data.Service.Contracts.Models.Player.Config' + + def _real_extract(self, url): + qs = compat_parse_qs(re.match(self._VALID_URL, url).group('query')) + if not qs.get('PortalID') or not qs.get('DestinationID') or not qs.get('ContentID'): + raise ExtractorError('Invalid URL', expected=True) + + portal_id = qs['PortalID'][0] + playlist_id = qs['DestinationID'][0] + content_id = qs['ContentID'][0] + + content_data = self._download_xml('https://mediahub.rice.edu/api/portal/GetContentTitle', content_id, query={ + 'portalId': portal_id, + 'playlistId': playlist_id, + 'contentId': content_id + }) + metadata = xpath_element(content_data, './/metaData', fatal=True) + title = xpath_text(metadata, 'primaryTitle', fatal=True) + encodings = xpath_element(content_data, './/encodings', fatal=True) + player_data = self._download_xml('https://mediahub.rice.edu/api/player/GetPlayerConfig', content_id, query={ + 'temporaryLinkId': xpath_text(encodings, 'temporaryLinkId', fatal=True), + 'contentId': content_id, + }) + + common_fmt = {} + dimensions = xpath_text(encodings, 'dimensions') + if dimensions: + wh = dimensions.split('x') + if len(wh) == 2: + common_fmt.update({ + 'width': int_or_none(wh[0]), + 'height': int_or_none(wh[1]), + }) + + formats = [] + rtsp_path = xpath_text(player_data, self._xpath_ns('RtspPath', self._NS)) + if rtsp_path: + fmt = { + 'url': rtsp_path, + 'format_id': 'rtsp', + } + fmt.update(common_fmt) + formats.append(fmt) + for source in player_data.findall(self._xpath_ns('.//Source', self._NS)): + video_url = xpath_text(source, self._xpath_ns('File', self._NS)) + if not video_url: + continue + if '.m3u8' in video_url: + formats.extend(self._extract_m3u8_formats(video_url, content_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + else: + fmt = { + 'url': video_url, + 'format_id': video_url.split(':')[0], + } + fmt.update(common_fmt) + rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url) + if rtmp: + fmt.update({ + 'url': rtmp.group('url'), + 'play_path': rtmp.group('playpath'), + 'app': rtmp.group('app'), + 'ext': 'flv', + }) + formats.append(fmt) + self._sort_formats(formats) + + thumbnails = [] + for content_asset in content_data.findall('.//contentAssets'): + asset_type = xpath_text(content_asset, 'type') + if asset_type == 'image': + image_url = xpath_text(content_asset, 'httpPath') + if not image_url: + continue + thumbnails.append({ + 'id': xpath_text(content_asset, 'ID'), + 'url': image_url, + }) + + return { + 'id': content_id, + 'title': title, + 'description': xpath_text(metadata, 'abstract'), + 'duration': int_or_none(xpath_text(metadata, 'duration')), + 'timestamp': parse_iso8601(xpath_text(metadata, 'dateUpdated')), + 'thumbnails': thumbnails, + 'formats': formats, + } diff --git a/youtube_dl/extractor/rmcdecouverte.py b/youtube_dl/extractor/rmcdecouverte.py new file mode 100644 index 000000000..c3623edcc --- /dev/null +++ b/youtube_dl/extractor/rmcdecouverte.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .brightcove import BrightcoveLegacyIE +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) +from ..utils import smuggle_url + + +class RMCDecouverteIE(InfoExtractor): + _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/(?:(?:[^/]+/)*program_(?P<id>\d+)|(?P<live_id>mediaplayer-direct))' + + _TESTS = [{ + 'url': 'https://rmcdecouverte.bfmtv.com/wheeler-dealers-occasions-a-saisir/program_2566/', + 'info_dict': { + 'id': '5983675500001', + 'ext': 'mp4', + 'title': 'CORVETTE', + 'description': 'md5:c1e8295521e45ffebf635d6a7658f506', + 'uploader_id': '1969646226001', + 'upload_date': '20181226', + 'timestamp': 1545861635, + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'only available for a week', + }, { + # live, geo restricted, bypassable + 'url': 'https://rmcdecouverte.bfmtv.com/mediaplayer-direct/', + 'only_matching': True, + }] + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('id') or mobj.group('live_id') + webpage = self._download_webpage(url, display_id) + brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) + if brightcove_legacy_url: + brightcove_id = compat_parse_qs(compat_urlparse.urlparse( + brightcove_legacy_url).query)['@videoPlayer'][0] + else: + brightcove_id = self._search_regex( + r'data-video-id=["\'](\d+)', webpage, 'brightcove id') + return self.url_result( + smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, + {'geo_countries': ['FR']}), + 'BrightcoveNew', brightcove_id) diff --git a/youtube_dl/extractor/ro220.py b/youtube_dl/extractor/ro220.py new file mode 100644 index 000000000..69934ef2b --- /dev/null +++ b/youtube_dl/extractor/ro220.py @@ -0,0 +1,43 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote + + +class Ro220IE(InfoExtractor): + IE_NAME = '220.ro' + _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<id>[^/]+)' + _TEST = { + 'url': 'http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/', + 'md5': '03af18b73a07b4088753930db7a34add', + 'info_dict': { + 'id': 'LYV6doKo7f', + 'ext': 'mp4', + 'title': 'Luati-le Banii sez 4 ep 1', + 'description': r're:^Iata-ne reveniti dupa o binemeritata vacanta\. +Va astept si pe Facebook cu pareri si comentarii.$', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + url = compat_urllib_parse_unquote(self._search_regex( + r'(?s)clip\s*:\s*{.*?url\s*:\s*\'([^\']+)\'', webpage, 'url')) + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + + formats = [{ + 'format_id': 'sd', + 'url': url, + 'ext': 'mp4', + }] + + return { + 'id': video_id, + 'formats': formats, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/rockstargames.py b/youtube_dl/extractor/rockstargames.py new file mode 100644 index 000000000..cd6904bc9 --- /dev/null +++ b/youtube_dl/extractor/rockstargames.py @@ -0,0 +1,69 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_iso8601, +) + + +class RockstarGamesIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos(?:/video/|#?/?\?.*\bvideo=)(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.rockstargames.com/videos/video/11544/', + 'md5': '03b5caa6e357a4bd50e3143fc03e5733', + 'info_dict': { + 'id': '11544', + 'ext': 'mp4', + 'title': 'Further Adventures in Finance and Felony Trailer', + 'description': 'md5:6d31f55f30cb101b5476c4a379e324a3', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1464876000, + 'upload_date': '20160602', + } + }, { + 'url': 'http://www.rockstargames.com/videos#/?video=48', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._download_json( + 'https://www.rockstargames.com/videoplayer/videos/get-video.json', + video_id, query={ + 'id': video_id, + 'locale': 'en_us', + })['video'] + + title = video['title'] + + formats = [] + for video in video['files_processed']['video/mp4']: + if not video.get('src'): + continue + resolution = video.get('resolution') + height = int_or_none(self._search_regex( + r'^(\d+)[pP]$', resolution or '', 'height', default=None)) + formats.append({ + 'url': self._proto_relative_url(video['src']), + 'format_id': resolution, + 'height': height, + }) + + if not formats: + youtube_id = video.get('youtube_id') + if youtube_id: + return self.url_result(youtube_id, 'Youtube') + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': video.get('description'), + 'thumbnail': self._proto_relative_url(video.get('screencap')), + 'timestamp': parse_iso8601(video.get('created')), + 'formats': formats, + } diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py new file mode 100644 index 000000000..857434540 --- /dev/null +++ b/youtube_dl/extractor/roosterteeth.py @@ -0,0 +1,148 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + strip_or_none, + unescapeHTML, + urlencode_postdata, +) + + +class RoosterTeethIE(InfoExtractor): + _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P<id>[^/?#&]+)' + _LOGIN_URL = 'https://roosterteeth.com/login' + _NETRC_MACHINE = 'roosterteeth' + _TESTS = [{ + 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', + 'md5': 'e2bd7764732d785ef797700a2489f212', + 'info_dict': { + 'id': '26576', + 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement', + 'ext': 'mp4', + 'title': 'Million Dollars, But...: Million Dollars, But... The Game Announcement', + 'description': 'md5:0cc3b21986d54ed815f5faeccd9a9ca5', + 'thumbnail': r're:^https?://.*\.png$', + 'series': 'Million Dollars, But...', + 'episode': 'Million Dollars, But... The Game Announcement', + 'comment_count': int, + }, + }, { + 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', + 'only_matching': True, + }, { + 'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts', + 'only_matching': True, + }, { + 'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow', + 'only_matching': True, + }, { + 'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better', + 'only_matching': True, + }, { + # only available for FIRST members + 'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one', + 'only_matching': True, + }] + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + login_page = self._download_webpage( + self._LOGIN_URL, None, + note='Downloading login page', + errnote='Unable to download login page') + + login_form = self._hidden_inputs(login_page) + + login_form.update({ + 'username': username, + 'password': password, + }) + + login_request = self._download_webpage( + self._LOGIN_URL, None, + note='Logging in', + data=urlencode_postdata(login_form), + headers={ + 'Referer': self._LOGIN_URL, + }) + + if not any(re.search(p, login_request) for p in ( + r'href=["\']https?://(?:www\.)?roosterteeth\.com/logout"', + r'>Sign Out<')): + error = self._html_search_regex( + r'(?s)<div[^>]+class=(["\']).*?\balert-danger\b.*?\1[^>]*>(?:\s*<button[^>]*>.*?</button>)?(?P<error>.+?)</div>', + login_request, 'alert', default=None, group='error') + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError('Unable to log in') + + def _real_initialize(self): + self._login() + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + episode = strip_or_none(unescapeHTML(self._search_regex( + (r'videoTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', + r'<title>(?P<title>[^<]+)'), webpage, 'title', + default=None, group='title'))) + + title = strip_or_none(self._og_search_title( + webpage, default=None)) or episode + + m3u8_url = self._search_regex( + r'file\s*:\s*(["\'])(?Phttp.+?\.m3u8.*?)\1', + webpage, 'm3u8 url', default=None, group='url') + + if not m3u8_url: + if re.search(r']+class=["\']non-sponsor', webpage): + self.raise_login_required( + '%s is only available for FIRST members' % display_id) + + if re.search(r']+class=["\']golive-gate', webpage): + self.raise_login_required('%s is not available yet' % display_id) + + raise ExtractorError('Unable to extract m3u8 URL') + + formats = self._extract_m3u8_formats( + m3u8_url, display_id, ext='mp4', + entry_protocol='m3u8_native', m3u8_id='hls') + self._sort_formats(formats) + + description = strip_or_none(self._og_search_description(webpage)) + thumbnail = self._proto_relative_url(self._og_search_thumbnail(webpage)) + + series = self._search_regex( + (r'

    More ([^<]+)

    ', r']+>See All ([^<]+) Videos<'), + webpage, 'series', fatal=False) + + comment_count = int_or_none(self._search_regex( + r'>Comments \((\d+)\)<', webpage, + 'comment count', fatal=False)) + + video_id = self._search_regex( + (r'containerId\s*=\s*["\']episode-(\d+)\1', + r'\d+)' + + _TEST = { + 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', + 'info_dict': { + 'id': '11028566', + 'ext': 'mp4', + 'title': 'Toy Story 3', + 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + iva_id = self._search_regex(r'publishedid=(\d+)', webpage, 'internet video archive id') + + return { + '_type': 'url_transparent', + 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?domain=www.videodetective.com&customerid=69249&playerid=641&publishedid=' + iva_id, + 'ie_key': InternetVideoArchiveIE.ie_key(), + 'id': video_id, + 'title': self._og_search_title(webpage), + } diff --git a/youtube_dl/extractor/roxwel.py b/youtube_dl/extractor/roxwel.py new file mode 100644 index 000000000..65284643b --- /dev/null +++ b/youtube_dl/extractor/roxwel.py @@ -0,0 +1,53 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import unified_strdate, determine_ext + + +class RoxwelIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?roxwel\.com/player/(?P.+?)(\.|\?|$)' + + _TEST = { + 'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html', + 'info_dict': { + 'id': 'passionpittakeawalklive', + 'ext': 'flv', + 'title': 'Take A Walk (live)', + 'uploader': 'Passion Pit', + 'uploader_id': 'passionpit', + 'upload_date': '20120928', + 'description': 'Passion Pit performs "Take A Walk\" live at The Backyard in Austin, Texas. ', + }, + 'params': { + # rtmp download + 'skip_download': True, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + filename = mobj.group('filename') + info_url = 'http://www.roxwel.com/api/videos/%s' % filename + info = self._download_json(info_url, filename) + + rtmp_rates = sorted([int(r.replace('flv_', '')) for r in info['media_rates'] if r.startswith('flv_')]) + best_rate = rtmp_rates[-1] + url_page_url = 'http://roxwel.com/pl_one_time.php?filename=%s&quality=%s' % (filename, best_rate) + rtmp_url = self._download_webpage(url_page_url, filename, 'Downloading video url') + ext = determine_ext(rtmp_url) + if ext == 'f4v': + rtmp_url = rtmp_url.replace(filename, 'mp4:%s' % filename) + + return { + 'id': filename, + 'title': info['title'], + 'url': rtmp_url, + 'ext': 'flv', + 'description': info['description'], + 'thumbnail': info.get('player_image_url') or info.get('image_url_large'), + 'uploader': info['artist'], + 'uploader_id': info['artistname'], + 'upload_date': unified_strdate(info['dbdate']), + } diff --git a/youtube_dl/extractor/rozhlas.py b/youtube_dl/extractor/rozhlas.py new file mode 100644 index 000000000..fccf69401 --- /dev/null +++ b/youtube_dl/extractor/rozhlas.py @@ -0,0 +1,50 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + remove_start, +) + + +class RozhlasIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?prehravac\.rozhlas\.cz/audio/(?P[0-9]+)' + _TESTS = [{ + 'url': 'http://prehravac.rozhlas.cz/audio/3421320', + 'md5': '504c902dbc9e9a1fd50326eccf02a7e2', + 'info_dict': { + 'id': '3421320', + 'ext': 'mp3', + 'title': 'Echo Pavla Klusáka (30.06.2015 21:00)', + 'description': 'Osmdesátiny Terryho Rileyho jsou skvělou příležitostí proletět se elektronickými i akustickými díly zakladatatele minimalismu, který je aktivní už přes padesát let' + } + }, { + 'url': 'http://prehravac.rozhlas.cz/audio/3421320/embed', + 'only_matching': True, + }] + + def _real_extract(self, url): + audio_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://prehravac.rozhlas.cz/audio/%s' % audio_id, audio_id) + + title = self._html_search_regex( + r'

    (.+?)

    \s*]*>.*?

    \s*]+id=["\']player-track', + webpage, 'title', default=None) or remove_start( + self._og_search_title(webpage), 'Radio Wave - ') + description = self._html_search_regex( + r']+title=(["\'])(?P(?:(?!\1).)+)\1[^>]*>.*?

    \s*]+id=["\']player-track', + webpage, 'description', fatal=False, group='url') + duration = int_or_none(self._search_regex( + r'data-duration=["\'](\d+)', webpage, 'duration', default=None)) + + return { + 'id': audio_id, + 'url': 'http://media.rozhlas.cz/_audio/%s.mp3' % audio_id, + 'title': title, + 'description': description, + 'duration': duration, + 'vcodec': 'none', + } diff --git a/youtube_dl/extractor/rtbf.py b/youtube_dl/extractor/rtbf.py new file mode 100644 index 000000000..3b0f3080b --- /dev/null +++ b/youtube_dl/extractor/rtbf.py @@ -0,0 +1,161 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + strip_or_none, +) + + +class RTBFIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?://(?:www\.)?rtbf\.be/ + (?: + video/[^?]+\?.*\bid=| + ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=| + auvio/[^/]+\?.*\b(?Pl)?id= + )(?P\d+)''' + _TESTS = [{ + 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274', + 'md5': '8c876a1cceeb6cf31b476461ade72384', + 'info_dict': { + 'id': '1921274', + 'ext': 'mp4', + 'title': 'Les Diables au coeur (épisode 2)', + 'description': '(du 25/04/2014)', + 'duration': 3099.54, + 'upload_date': '20140425', + 'timestamp': 1398456300, + } + }, { + # geo restricted + 'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442', + 'only_matching': True, + }, { + 'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858', + 'only_matching': True, + }, { + 'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996', + 'only_matching': True, + }, { + # Live + 'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775', + 'only_matching': True, + }, { + # Audio + 'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811', + 'only_matching': True, + }, { + # With Subtitle + 'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588', + 'only_matching': True, + }] + _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be' + _PROVIDERS = { + 'YOUTUBE': 'Youtube', + 'DAILYMOTION': 'Dailymotion', + 'VIMEO': 'Vimeo', + } + _QUALITIES = [ + ('mobile', 'SD'), + ('web', 'MD'), + ('high', 'HD'), + ] + + def _real_extract(self, url): + live, media_id = re.match(self._VALID_URL, url).groups() + embed_page = self._download_webpage( + 'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'), + media_id, query={'id': media_id}) + data = self._parse_json(self._html_search_regex( + r'data-media="([^"]+)"', embed_page, 'media data'), media_id) + + error = data.get('error') + if error: + raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) + + provider = data.get('provider') + if provider in self._PROVIDERS: + return self.url_result(data['url'], self._PROVIDERS[provider]) + + title = data['title'] + is_live = data.get('isLive') + if is_live: + title = self._live_title(title) + height_re = r'-(\d+)p\.' + formats = [] + + m3u8_url = data.get('urlHlsAes128') or data.get('urlHls') + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)) + + fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x + http_url = data.get('url') + if formats and http_url and re.search(height_re, http_url): + http_url = fix_url(http_url) + for m3u8_f in formats[:]: + height = m3u8_f.get('height') + if not height: + continue + f = m3u8_f.copy() + del f['protocol'] + f.update({ + 'format_id': m3u8_f['format_id'].replace('hls-', 'http-'), + 'url': re.sub(height_re, '-%dp.' % height, http_url), + }) + formats.append(f) + else: + sources = data.get('sources') or {} + for key, format_id in self._QUALITIES: + format_url = sources.get(key) + if not format_url: + continue + height = int_or_none(self._search_regex( + height_re, format_url, 'height', default=None)) + formats.append({ + 'format_id': format_id, + 'url': fix_url(format_url), + 'height': height, + }) + + mpd_url = data.get('urlDash') + if not data.get('drm') and mpd_url: + formats.extend(self._extract_mpd_formats( + mpd_url, media_id, mpd_id='dash', fatal=False)) + + audio_url = data.get('urlAudio') + if audio_url: + formats.append({ + 'format_id': 'audio', + 'url': audio_url, + 'vcodec': 'none', + }) + self._sort_formats(formats) + + subtitles = {} + for track in (data.get('tracks') or {}).values(): + sub_url = track.get('url') + if not sub_url: + continue + subtitles.setdefault(track.get('lang') or 'fr', []).append({ + 'url': sub_url, + }) + + return { + 'id': media_id, + 'formats': formats, + 'title': title, + 'description': strip_or_none(data.get('description')), + 'thumbnail': data.get('thumbnail'), + 'duration': float_or_none(data.get('realDuration')), + 'timestamp': int_or_none(data.get('liveFrom')), + 'series': data.get('programLabel'), + 'subtitles': subtitles, + 'is_live': is_live, + } diff --git a/youtube_dl/extractor/rte.py b/youtube_dl/extractor/rte.py new file mode 100644 index 000000000..1fbc72915 --- /dev/null +++ b/youtube_dl/extractor/rte.py @@ -0,0 +1,167 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ( + float_or_none, + parse_iso8601, + str_or_none, + try_get, + unescapeHTML, + url_or_none, + ExtractorError, +) + + +class RteBaseIE(InfoExtractor): + def _real_extract(self, url): + item_id = self._match_id(url) + + info_dict = {} + formats = [] + + ENDPOINTS = ( + 'https://feeds.rasset.ie/rteavgen/player/playlist?type=iptv&format=json&showId=', + 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=', + ) + + for num, ep_url in enumerate(ENDPOINTS, start=1): + try: + data = self._download_json(ep_url + item_id, item_id) + except ExtractorError as ee: + if num < len(ENDPOINTS) or formats: + continue + if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: + error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False) + if error_info: + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error_info['message']), + expected=True) + raise + + # NB the string values in the JSON are stored using XML escaping(!) + show = try_get(data, lambda x: x['shows'][0], dict) + if not show: + continue + + if not info_dict: + title = unescapeHTML(show['title']) + description = unescapeHTML(show.get('description')) + thumbnail = show.get('thumbnail') + duration = float_or_none(show.get('duration'), 1000) + timestamp = parse_iso8601(show.get('published')) + info_dict = { + 'id': item_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + } + + mg = try_get(show, lambda x: x['media:group'][0], dict) + if not mg: + continue + + if mg.get('url'): + m = re.match(r'(?Prtmpe?://[^/]+)/(?P.+)/(?Pmp4:.*)', mg['url']) + if m: + m = m.groupdict() + formats.append({ + 'url': m['url'] + '/' + m['app'], + 'app': m['app'], + 'play_path': m['playpath'], + 'player_url': url, + 'ext': 'flv', + 'format_id': 'rtmp', + }) + + if mg.get('hls_server') and mg.get('hls_url'): + formats.extend(self._extract_m3u8_formats( + mg['hls_server'] + mg['hls_url'], item_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) + + if mg.get('hds_server') and mg.get('hds_url'): + formats.extend(self._extract_f4m_formats( + mg['hds_server'] + mg['hds_url'], item_id, + f4m_id='hds', fatal=False)) + + mg_rte_server = str_or_none(mg.get('rte:server')) + mg_url = str_or_none(mg.get('url')) + if mg_rte_server and mg_url: + hds_url = url_or_none(mg_rte_server + mg_url) + if hds_url: + formats.extend(self._extract_f4m_formats( + hds_url, item_id, f4m_id='hds', fatal=False)) + + self._sort_formats(formats) + + info_dict['formats'] = formats + return info_dict + + +class RteIE(RteBaseIE): + IE_NAME = 'rte' + IE_DESC = 'Raidió Teilifís Éireann TV' + _VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/', + 'md5': '4a76eb3396d98f697e6e8110563d2604', + 'info_dict': { + 'id': '10478715', + 'ext': 'mp4', + 'title': 'iWitness', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'The spirit of Ireland, one voice and one minute at a time.', + 'duration': 60.046, + 'upload_date': '20151012', + 'timestamp': 1444694160, + }, + } + + +class RteRadioIE(RteBaseIE): + IE_NAME = 'rte:radio' + IE_DESC = 'Raidió Teilifís Éireann radio' + # Radioplayer URLs have two distinct specifier formats, + # the old format #!rii=:::: + # the new format #!rii=b____ + # where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated. + # An uniquely defines an individual recording, and is the only part we require. + _VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P[0-9]+)' + + _TESTS = [{ + # Old-style player URL; HLS and RTMPE formats + 'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:', + 'md5': 'c79ccb2c195998440065456b69760411', + 'info_dict': { + 'id': '10507902', + 'ext': 'mp4', + 'title': 'Gloria', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'md5:9ce124a7fb41559ec68f06387cabddf0', + 'timestamp': 1451203200, + 'upload_date': '20151227', + 'duration': 7230.0, + }, + }, { + # New-style player URL; RTMPE formats only + 'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_', + 'info_dict': { + 'id': '3250678', + 'ext': 'flv', + 'title': 'The Lyric Concert with Paul Herriott', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': '', + 'timestamp': 1333742400, + 'upload_date': '20120406', + 'duration': 7199.016, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }] diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dl/extractor/rtl2.py new file mode 100644 index 000000000..70f000ca8 --- /dev/null +++ b/youtube_dl/extractor/rtl2.py @@ -0,0 +1,207 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..aes import aes_cbc_decrypt +from ..compat import ( + compat_b64decode, + compat_ord, + compat_str, +) +from ..utils import ( + bytes_to_intlist, + ExtractorError, + intlist_to_bytes, + int_or_none, + strip_or_none, +) + + +class RTL2IE(InfoExtractor): + IE_NAME = 'rtl2' + _VALID_URL = r'https?://(?:www\.)?rtl2\.de/sendung/[^/]+/(?:video/(?P\d+)[^/]+/(?P\d+)-|folge/)(?P[^/?#]+)' + _TESTS = [{ + 'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0', + 'info_dict': { + 'id': 'folge-203-0', + 'ext': 'f4v', + 'title': 'GRIP sucht den Sommerkönig', + 'description': 'md5:e3adbb940fd3c6e76fa341b8748b562f' + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], + }, { + 'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/', + 'info_dict': { + 'id': 'anna-erwischt-alex', + 'ext': 'mp4', + 'title': 'Anna erwischt Alex!', + 'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.' + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], + }] + + def _real_extract(self, url): + vico_id, vivi_id, display_id = re.match(self._VALID_URL, url).groups() + if not vico_id: + webpage = self._download_webpage(url, display_id) + + mobj = re.search( + r'data-collection="(?P\d+)"[^>]+data-video="(?P\d+)"', + webpage) + if mobj: + vico_id = mobj.group('vico_id') + vivi_id = mobj.group('vivi_id') + else: + vico_id = self._html_search_regex( + r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id') + vivi_id = self._html_search_regex( + r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id') + + info = self._download_json( + 'https://service.rtl2.de/api-player-vipo/video.php', + display_id, query={ + 'vico_id': vico_id, + 'vivi_id': vivi_id, + }) + video_info = info['video'] + title = video_info['titel'] + + formats = [] + + rtmp_url = video_info.get('streamurl') + if rtmp_url: + rtmp_url = rtmp_url.replace('\\', '') + stream_url = 'mp4:' + self._html_search_regex(r'/ondemand/(.+)', rtmp_url, 'stream URL') + rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0'] + + formats.append({ + 'format_id': 'rtmp', + 'url': rtmp_url, + 'play_path': stream_url, + 'player_url': 'https://www.rtl2.de/sites/default/modules/rtl2/jwplayer/jwplayer-7.6.0/jwplayer.flash.swf', + 'page_url': url, + 'flash_version': 'LNX 11,2,202,429', + 'rtmp_conn': rtmp_conn, + 'no_resume': True, + 'preference': 1, + }) + + m3u8_url = video_info.get('streamurl_hls') + if m3u8_url: + formats.extend(self._extract_akamai_formats(m3u8_url, display_id)) + + self._sort_formats(formats) + + return { + 'id': display_id, + 'title': title, + 'thumbnail': video_info.get('image'), + 'description': video_info.get('beschreibung'), + 'duration': int_or_none(video_info.get('duration')), + 'formats': formats, + } + + +class RTL2YouBaseIE(InfoExtractor): + _BACKWERK_BASE_URL = 'https://p-you-backwerk.rtl2apps.de/' + + +class RTL2YouIE(RTL2YouBaseIE): + IE_NAME = 'rtl2:you' + _VALID_URL = r'http?://you\.rtl2\.de/(?:video/\d+/|youplayer/index\.html\?.*?\bvid=)(?P\d+)' + _TESTS = [{ + 'url': 'http://you.rtl2.de/video/3002/15740/MJUNIK%20%E2%80%93%20Home%20of%20YOU/307-hirn-wo-bist-du', + 'info_dict': { + 'id': '15740', + 'ext': 'mp4', + 'title': 'MJUNIK – Home of YOU - #307 Hirn, wo bist du?!', + 'description': 'md5:ddaa95c61b372b12b66e115b2772fe01', + 'age_limit': 12, + }, + }, { + 'url': 'http://you.rtl2.de/youplayer/index.html?vid=15712', + 'only_matching': True, + }] + _AES_KEY = b'\xe9W\xe4.<*\xb8\x1a\xd2\xb6\x92\xf3C\xd3\xefL\x1b\x03*\xbbbH\xc0\x03\xffo\xc2\xf2(\xaa\xaa!' + _GEO_COUNTRIES = ['DE'] + + def _real_extract(self, url): + video_id = self._match_id(url) + + stream_data = self._download_json( + self._BACKWERK_BASE_URL + 'stream/video/' + video_id, video_id) + + data, iv = compat_b64decode(stream_data['streamUrl']).decode().split(':') + stream_url = intlist_to_bytes(aes_cbc_decrypt( + bytes_to_intlist(compat_b64decode(data)), + bytes_to_intlist(self._AES_KEY), + bytes_to_intlist(compat_b64decode(iv)) + )) + if b'rtl2_you_video_not_found' in stream_url: + raise ExtractorError('video not found', expected=True) + + formats = self._extract_m3u8_formats( + stream_url[:-compat_ord(stream_url[-1])].decode(), + video_id, 'mp4', 'm3u8_native') + self._sort_formats(formats) + + video_data = self._download_json( + self._BACKWERK_BASE_URL + 'video/' + video_id, video_id) + + series = video_data.get('formatTitle') + title = episode = video_data.get('title') or series + if series and series != title: + title = '%s - %s' % (series, title) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': strip_or_none(video_data.get('description')), + 'thumbnail': video_data.get('image'), + 'duration': int_or_none(stream_data.get('duration') or video_data.get('duration'), 1000), + 'series': series, + 'episode': episode, + 'age_limit': int_or_none(video_data.get('minimumAge')), + } + + +class RTL2YouSeriesIE(RTL2YouBaseIE): + IE_NAME = 'rtl2:you:series' + _VALID_URL = r'http?://you\.rtl2\.de/videos/(?P\d+)' + _TEST = { + 'url': 'http://you.rtl2.de/videos/115/dragon-ball', + 'info_dict': { + 'id': '115', + }, + 'playlist_mincount': 5, + } + + def _real_extract(self, url): + series_id = self._match_id(url) + stream_data = self._download_json( + self._BACKWERK_BASE_URL + 'videos', + series_id, query={ + 'formatId': series_id, + 'limit': 1000000000, + }) + + entries = [] + for video in stream_data.get('videos', []): + video_id = compat_str(video['videoId']) + if not video_id: + continue + entries.append(self.url_result( + 'http://you.rtl2.de/video/%s/%s' % (series_id, video_id), + 'RTL2You', video_id)) + return self.playlist_result(entries, series_id) diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dl/extractor/rtlnl.py new file mode 100644 index 000000000..0b5e55d16 --- /dev/null +++ b/youtube_dl/extractor/rtlnl.py @@ -0,0 +1,126 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + parse_duration, +) + + +class RtlNlIE(InfoExtractor): + IE_NAME = 'rtl.nl' + IE_DESC = 'rtl.nl and rtlxl.nl' + _VALID_URL = r'''(?x) + https?://(?:(?:www|static)\.)? + (?: + rtlxl\.nl/[^\#]*\#!/[^/]+/| + rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/) + ) + (?P[0-9a-f-]+)''' + + _TESTS = [{ + 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416', + 'md5': '473d1946c1fdd050b2c0161a4b13c373', + 'info_dict': { + 'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416', + 'ext': 'mp4', + 'title': 'RTL Nieuws', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'timestamp': 1461951000, + 'upload_date': '20160429', + 'duration': 1167.96, + }, + }, { + # best format avaialble a3t + 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', + 'md5': 'dea7474214af1271d91ef332fb8be7ea', + 'info_dict': { + 'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed', + 'ext': 'mp4', + 'timestamp': 1424039400, + 'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag', + 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$', + 'upload_date': '20150215', + 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.', + } + }, { + # empty synopsis and missing episodes (see https://github.com/ytdl-org/youtube-dl/issues/6275) + # best format available nettv + 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false', + 'info_dict': { + 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a', + 'ext': 'mp4', + 'title': 'RTL Nieuws - Meer beelden van overval juwelier', + 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$', + 'timestamp': 1437233400, + 'upload_date': '20150718', + 'duration': 30.474, + }, + 'params': { + 'skip_download': True, + }, + }, { + # encrypted m3u8 streams, georestricted + 'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7', + 'only_matching': True, + }, { + 'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0', + 'only_matching': True, + }, { + 'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f', + 'only_matching': True, + }, { + 'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/', + 'only_matching': True, + }, { + 'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl', + 'only_matching': True, + }] + + def _real_extract(self, url): + uuid = self._match_id(url) + info = self._download_json( + 'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=adaptive/' % uuid, + uuid) + + material = info['material'][0] + title = info['abstracts'][0]['name'] + subtitle = material.get('title') + if subtitle: + title += ' - %s' % subtitle + description = material.get('synopsis') + + meta = info.get('meta', {}) + + videopath = material['videopath'] + m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath + + formats = self._extract_m3u8_formats( + m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False) + self._sort_formats(formats) + + thumbnails = [] + + for p in ('poster_base_url', '"thumb_base_url"'): + if not meta.get(p): + continue + + thumbnails.append({ + 'url': self._proto_relative_url(meta[p] + uuid), + 'width': int_or_none(self._search_regex( + r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)), + 'height': int_or_none(self._search_regex( + r'/sz=[0-9]+x([0-9]+)', + meta[p], 'thumbnail height', fatal=False)) + }) + + return { + 'id': uuid, + 'title': title, + 'formats': formats, + 'timestamp': material['original_date'], + 'description': description, + 'duration': parse_duration(material.get('duration')), + 'thumbnails': thumbnails, + } diff --git a/youtube_dl/extractor/rtp.py b/youtube_dl/extractor/rtp.py new file mode 100644 index 000000000..02986f442 --- /dev/null +++ b/youtube_dl/extractor/rtp.py @@ -0,0 +1,66 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + js_to_json, +) + + +class RTPIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P[0-9]+)/(?P[^/?#]+)/?' + _TESTS = [{ + 'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas', + 'md5': 'e736ce0c665e459ddb818546220b4ef8', + 'info_dict': { + 'id': 'e174042', + 'ext': 'mp3', + 'title': 'Paixões Cruzadas', + 'description': 'As paixões musicais de António Cartaxo e António Macedo', + 'thumbnail': r're:^https?://.*\.jpg', + }, + }, { + 'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + title = self._html_search_meta( + 'twitter:title', webpage, display_name='title', fatal=True) + + config = self._parse_json(self._search_regex( + r'(?s)RTPPlayer\(({.+?})\);', webpage, + 'player config'), video_id, js_to_json) + file_url = config['file'] + ext = determine_ext(file_url) + if ext == 'm3u8': + file_key = config.get('fileKey') + formats = self._extract_m3u8_formats( + file_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=file_key) + if file_key: + formats.append({ + 'url': 'https://cdn-ondemand.rtp.pt' + file_key, + 'preference': 1, + }) + self._sort_formats(formats) + else: + formats = [{ + 'url': file_url, + 'ext': ext, + }] + if config.get('mediaType') == 'audio': + for f in formats: + f['vcodec'] = 'none' + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': self._html_search_meta(['description', 'twitter:description'], webpage), + 'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage), + } diff --git a/youtube_dl/extractor/rts.py b/youtube_dl/extractor/rts.py new file mode 100644 index 000000000..48f17b828 --- /dev/null +++ b/youtube_dl/extractor/rts.py @@ -0,0 +1,230 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .srgssr import SRGSSRIE +from ..compat import compat_str +from ..utils import ( + int_or_none, + parse_duration, + parse_iso8601, + unescapeHTML, + determine_ext, +) + + +class RTSIE(SRGSSRIE): + IE_DESC = 'RTS.ch' + _VALID_URL = r'rts:(?P\d+)|https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P[0-9]+)-(?P.+?)\.html' + + _TESTS = [ + { + 'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html', + 'md5': 'ff7f8450a90cf58dacb64e29707b4a8e', + 'info_dict': { + 'id': '3449373', + 'display_id': 'les-enfants-terribles', + 'ext': 'mp4', + 'duration': 1488, + 'title': 'Les Enfants Terribles', + 'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.', + 'uploader': 'Divers', + 'upload_date': '19680921', + 'timestamp': -40280400, + 'thumbnail': r're:^https?://.*\.image', + 'view_count': int, + }, + }, + { + 'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html', + 'info_dict': { + 'id': '5624065', + 'title': 'Passe-moi les jumelles', + }, + 'playlist_mincount': 4, + }, + { + 'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html', + 'info_dict': { + 'id': '5745975', + 'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski', + 'ext': 'mp4', + 'duration': 48, + 'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski', + 'description': 'Hockey - Playoff', + 'uploader': 'Hockey', + 'upload_date': '20140403', + 'timestamp': 1396556882, + 'thumbnail': r're:^https?://.*\.image', + 'view_count': int, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'skip': 'Blocked outside Switzerland', + }, + { + 'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html', + 'md5': '1bae984fe7b1f78e94abc74e802ed99f', + 'info_dict': { + 'id': '5745356', + 'display_id': 'londres-cachee-par-un-epais-smog', + 'ext': 'mp4', + 'duration': 33, + 'title': 'Londres cachée par un épais smog', + 'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.', + 'uploader': 'L\'actu en vidéo', + 'upload_date': '20140403', + 'timestamp': 1396537322, + 'thumbnail': r're:^https?://.*\.image', + 'view_count': int, + }, + }, + { + 'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html', + 'md5': 'dd8ef6a22dff163d063e2a52bc8adcae', + 'info_dict': { + 'id': '5706148', + 'display_id': 'urban-hippie-de-damien-krisl-03-04-2014', + 'ext': 'mp3', + 'duration': 123, + 'title': '"Urban Hippie", de Damien Krisl', + 'description': 'Des Hippies super glam.', + 'upload_date': '20140403', + 'timestamp': 1396551600, + }, + }, + { + # article with videos on rhs + 'url': 'http://www.rts.ch/sport/hockey/6693917-hockey-davos-decroche-son-31e-titre-de-champion-de-suisse.html', + 'info_dict': { + 'id': '6693917', + 'title': 'Hockey: Davos décroche son 31e titre de champion de Suisse', + }, + 'playlist_mincount': 5, + }, + { + 'url': 'http://pages.rts.ch/emissions/passe-moi-les-jumelles/5624065-entre-ciel-et-mer.html', + 'only_matching': True, + } + ] + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + media_id = m.group('rts_id') or m.group('id') + display_id = m.group('display_id') or media_id + + def download_json(internal_id): + return self._download_json( + 'http://www.rts.ch/a/%s.html?f=json/article' % internal_id, + display_id) + + all_info = download_json(media_id) + + # media_id extracted out of URL is not always a real id + if 'video' not in all_info and 'audio' not in all_info: + entries = [] + + for item in all_info.get('items', []): + item_url = item.get('url') + if not item_url: + continue + entries.append(self.url_result(item_url, 'RTS')) + + if not entries: + page, urlh = self._download_webpage_handle(url, display_id) + if re.match(self._VALID_URL, urlh.geturl()).group('id') != media_id: + return self.url_result(urlh.geturl(), 'RTS') + + # article with videos on rhs + videos = re.findall( + r']+class="content-item"[^>]*>\s*]+data-video-urn="urn:([^"]+)"', + page) + if not videos: + videos = re.findall( + r'(?s)]+class="srg-player"[^>]+src="[^"]+urn:([^"]+)"', + page) + if videos: + entries = [self.url_result('srgssr:%s' % video_urn, 'SRGSSR') for video_urn in videos] + + if entries: + return self.playlist_result(entries, media_id, all_info.get('title')) + + internal_id = self._html_search_regex( + r'<(?:video|audio) data-id="([0-9]+)"', page, + 'internal video id') + all_info = download_json(internal_id) + + media_type = 'video' if 'video' in all_info else 'audio' + + # check for errors + self.get_media_data('rts', media_type, media_id) + + info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio'] + + title = info['title'] + + def extract_bitrate(url): + return int_or_none(self._search_regex( + r'-([0-9]+)k\.', url, 'bitrate', default=None)) + + formats = [] + streams = info.get('streams', {}) + for format_id, format_url in streams.items(): + if format_id == 'hds_sd' and 'hds' in streams: + continue + if format_id == 'hls_sd' and 'hls' in streams: + continue + ext = determine_ext(format_url) + if ext in ('m3u8', 'f4m'): + format_url = self._get_tokenized_src(format_url, media_id, format_id) + if ext == 'f4m': + formats.extend(self._extract_f4m_formats( + format_url + ('?' if '?' not in format_url else '&') + 'hdcore=3.4.0', + media_id, f4m_id=format_id, fatal=False)) + else: + formats.extend(self._extract_m3u8_formats( + format_url, media_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False)) + else: + formats.append({ + 'format_id': format_id, + 'url': format_url, + 'tbr': extract_bitrate(format_url), + }) + + for media in info.get('media', []): + media_url = media.get('url') + if not media_url or re.match(r'https?://', media_url): + continue + rate = media.get('rate') + ext = media.get('ext') or determine_ext(media_url, 'mp4') + format_id = ext + if rate: + format_id += '-%dk' % rate + formats.append({ + 'format_id': format_id, + 'url': 'http://download-video.rts.ch/' + media_url, + 'tbr': rate or extract_bitrate(media_url), + }) + + self._check_formats(formats, media_id) + self._sort_formats(formats) + + duration = info.get('duration') or info.get('cutout') or info.get('cutduration') + if isinstance(duration, compat_str): + duration = parse_duration(duration) + + return { + 'id': media_id, + 'display_id': display_id, + 'formats': formats, + 'title': title, + 'description': info.get('intro'), + 'duration': duration, + 'view_count': int_or_none(info.get('plays')), + 'uploader': info.get('programName'), + 'timestamp': parse_iso8601(info.get('broadcast_date')), + 'thumbnail': unescapeHTML(info.get('preview_image_url')), + } diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py new file mode 100644 index 000000000..ce9db0629 --- /dev/null +++ b/youtube_dl/extractor/rtve.py @@ -0,0 +1,292 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import base64 +import re +import time + +from .common import InfoExtractor +from ..compat import ( + compat_b64decode, + compat_struct_unpack, +) +from ..utils import ( + determine_ext, + ExtractorError, + float_or_none, + remove_end, + remove_start, + sanitized_Request, + std_headers, +) + + +def _decrypt_url(png): + encrypted_data = compat_b64decode(png) + text_index = encrypted_data.find(b'tEXt') + text_chunk = encrypted_data[text_index - 4:] + length = compat_struct_unpack('!I', text_chunk[:4])[0] + # Use bytearray to get integers when iterating in both python 2.x and 3.x + data = bytearray(text_chunk[8:8 + length]) + data = [chr(b) for b in data if b != 0] + hash_index = data.index('#') + alphabet_data = data[:hash_index] + url_data = data[hash_index + 1:] + if url_data[0] == 'H' and url_data[3] == '%': + # remove useless HQ%% at the start + url_data = url_data[4:] + + alphabet = [] + e = 0 + d = 0 + for l in alphabet_data: + if d == 0: + alphabet.append(l) + d = e = (e + 1) % 4 + else: + d -= 1 + url = '' + f = 0 + e = 3 + b = 1 + for letter in url_data: + if f == 0: + l = int(letter) * 10 + f = 1 + else: + if e == 0: + l += int(letter) + url += alphabet[l] + e = (b + 3) % 4 + f = 0 + b += 1 + else: + e -= 1 + + return url + + +class RTVEALaCartaIE(InfoExtractor): + IE_NAME = 'rtve.es:alacarta' + IE_DESC = 'RTVE a la carta' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P\d+)' + + _TESTS = [{ + 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', + 'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43', + 'info_dict': { + 'id': '2491869', + 'ext': 'mp4', + 'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia', + 'duration': 5024.566, + }, + }, { + 'note': 'Live stream', + 'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/', + 'info_dict': { + 'id': '1694255', + 'ext': 'flv', + 'title': 'TODO', + }, + 'skip': 'The f4m manifest can\'t be used yet', + }, { + 'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/', + 'md5': 'e55e162379ad587e9640eda4f7353c0f', + 'info_dict': { + 'id': '4236788', + 'ext': 'mp4', + 'title': 'Servir y proteger - Capítulo 104 ', + 'duration': 3222.0, + }, + 'params': { + 'skip_download': True, # requires ffmpeg + }, + }, { + 'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve', + 'only_matching': True, + }, { + 'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/', + 'only_matching': True, + }] + + def _real_initialize(self): + user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8') + manager_info = self._download_json( + 'http://www.rtve.es/odin/loki/' + user_agent_b64, + None, 'Fetching manager info') + self._manager = manager_info['manager'] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + info = self._download_json( + 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, + video_id)['page']['items'][0] + if info['state'] == 'DESPU': + raise ExtractorError('The video is no longer available', expected=True) + title = info['title'] + png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id) + png_request = sanitized_Request(png_url) + png_request.add_header('Referer', url) + png = self._download_webpage(png_request, video_id, 'Downloading url information') + video_url = _decrypt_url(png) + ext = determine_ext(video_url) + + formats = [] + if not video_url.endswith('.f4m') and ext != 'm3u8': + if '?' not in video_url: + video_url = video_url.replace('resources/', 'auth/resources/') + video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve') + + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + video_url, video_id, f4m_id='hds', fatal=False)) + else: + formats.append({ + 'url': video_url, + }) + self._sort_formats(formats) + + subtitles = None + if info.get('sbtFile') is not None: + subtitles = self.extract_subtitles(video_id, info['sbtFile']) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': info.get('image'), + 'page_url': url, + 'subtitles': subtitles, + 'duration': float_or_none(info.get('duration'), scale=1000), + } + + def _get_subtitles(self, video_id, sub_file): + subs = self._download_json( + sub_file + '.json', video_id, + 'Downloading subtitles info')['page']['items'] + return dict( + (s['lang'], [{'ext': 'vtt', 'url': s['src']}]) + for s in subs) + + +class RTVEInfantilIE(InfoExtractor): + IE_NAME = 'rtve.es:infantil' + IE_DESC = 'RTVE infantil' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P[^/]*)/video/(?P[^/]*)/(?P[0-9]+)/' + + _TESTS = [{ + 'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/', + 'md5': '915319587b33720b8e0357caaa6617e6', + 'info_dict': { + 'id': '3040283', + 'ext': 'mp4', + 'title': 'Maneras de vivir', + 'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG', + 'duration': 357.958, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + info = self._download_json( + 'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id, + video_id)['page']['items'][0] + + webpage = self._download_webpage(url, video_id) + vidplayer_id = self._search_regex( + r' id="vidplayer([0-9]+)"', webpage, 'internal video ID') + + png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id + png = self._download_webpage(png_url, video_id, 'Downloading url information') + video_url = _decrypt_url(png) + + return { + 'id': video_id, + 'ext': 'mp4', + 'title': info['title'], + 'url': video_url, + 'thumbnail': info.get('image'), + 'duration': float_or_none(info.get('duration'), scale=1000), + } + + +class RTVELiveIE(InfoExtractor): + IE_NAME = 'rtve.es:live' + IE_DESC = 'RTVE.es live streams' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P[a-zA-Z0-9-]+)' + + _TESTS = [{ + 'url': 'http://www.rtve.es/directo/la-1/', + 'info_dict': { + 'id': 'la-1', + 'ext': 'mp4', + 'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$', + }, + 'params': { + 'skip_download': 'live stream', + } + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + start_time = time.gmtime() + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es') + title = remove_start(title, 'Estoy viendo ') + title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time) + + vidplayer_id = self._search_regex( + (r'playerId=player([0-9]+)', + r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)', + r'data-id=["\'](\d+)'), + webpage, 'internal video ID') + png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id + png = self._download_webpage(png_url, video_id, 'Downloading url information') + m3u8_url = _decrypt_url(png) + formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'is_live': True, + } + + +class RTVETelevisionIE(InfoExtractor): + IE_NAME = 'rtve.es:television' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P\d+).shtml' + + _TEST = { + 'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', + 'info_dict': { + 'id': '3069778', + 'ext': 'mp4', + 'title': 'Documentos TV - La revolución del móvil', + 'duration': 3496.948, + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + + alacarta_url = self._search_regex( + r'data-location="alacarta_videos"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&', + webpage, 'alacarta url', default=None) + if alacarta_url is None: + raise ExtractorError( + 'The webpage doesn\'t contain any video', expected=True) + + return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key()) diff --git a/youtube_dl/extractor/rtvnh.py b/youtube_dl/extractor/rtvnh.py new file mode 100644 index 000000000..6a00f7007 --- /dev/null +++ b/youtube_dl/extractor/rtvnh.py @@ -0,0 +1,62 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class RTVNHIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.rtvnh.nl/video/131946', + 'md5': 'cdbec9f44550763c8afc96050fa747dc', + 'info_dict': { + 'id': '131946', + 'ext': 'mp4', + 'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw', + 'thumbnail': r're:^https?:.*\.jpg$' + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + meta = self._parse_json(self._download_webpage( + 'http://www.rtvnh.nl/video/json?m=' + video_id, video_id), video_id) + + status = meta.get('status') + if status != 200: + raise ExtractorError( + '%s returned error code %d' % (self.IE_NAME, status), expected=True) + + formats = [] + rtmp_formats = self._extract_smil_formats( + 'http://www.rtvnh.nl/video/smil?m=' + video_id, video_id) + formats.extend(rtmp_formats) + + for rtmp_format in rtmp_formats: + rtmp_url = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) + rtsp_format = rtmp_format.copy() + del rtsp_format['play_path'] + del rtsp_format['ext'] + rtsp_format.update({ + 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), + 'url': rtmp_url.replace('rtmp://', 'rtsp://'), + 'protocol': 'rtsp', + }) + formats.append(rtsp_format) + http_base_url = rtmp_url.replace('rtmp://', 'http://') + formats.extend(self._extract_m3u8_formats( + http_base_url + '/playlist.m3u8', video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False)) + formats.extend(self._extract_f4m_formats( + http_base_url + '/manifest.f4m', + video_id, f4m_id='hds', fatal=False)) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': meta['title'].strip(), + 'thumbnail': meta.get('image'), + 'formats': formats + } diff --git a/youtube_dl/extractor/rtvs.py b/youtube_dl/extractor/rtvs.py new file mode 100644 index 000000000..6573b260d --- /dev/null +++ b/youtube_dl/extractor/rtvs.py @@ -0,0 +1,47 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RTVSIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv/\d+/(?P\d+)' + _TESTS = [{ + # radio archive + 'url': 'http://www.rtvs.sk/radio/archiv/11224/414872', + 'md5': '134d5d6debdeddf8a5d761cbc9edacb8', + 'info_dict': { + 'id': '414872', + 'ext': 'mp3', + 'title': 'Ostrov pokladov 1 časť.mp3' + }, + 'params': { + 'skip_download': True, + } + }, { + # tv archive + 'url': 'http://www.rtvs.sk/televizia/archiv/8249/63118', + 'md5': '85e2c55cf988403b70cac24f5c086dc6', + 'info_dict': { + 'id': '63118', + 'ext': 'mp4', + 'title': 'Amaro Džives - Náš deň', + 'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.' + }, + 'params': { + 'skip_download': True, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + playlist_url = self._search_regex( + r'playlist["\']?\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, + 'playlist url', group='url') + + data = self._download_json( + playlist_url, video_id, 'Downloading playlist')[0] + return self._parse_jwplayer_data(data, video_id=video_id) diff --git a/youtube_dl/extractor/rudo.py b/youtube_dl/extractor/rudo.py new file mode 100644 index 000000000..f036f6757 --- /dev/null +++ b/youtube_dl/extractor/rudo.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + js_to_json, + get_element_by_class, + unified_strdate, +) + + +class RudoIE(InfoExtractor): + _VALID_URL = r'https?://rudo\.video/vod/(?P[0-9a-zA-Z]+)' + + _TEST = { + 'url': 'http://rudo.video/vod/oTzw0MGnyG', + 'md5': '2a03a5b32dd90a04c83b6d391cf7b415', + 'info_dict': { + 'id': 'oTzw0MGnyG', + 'ext': 'mp4', + 'title': 'Comentario Tomás Mosciatti', + 'upload_date': '20160617', + }, + } + + @classmethod + def _extract_url(cls, webpage): + mobj = re.search( + r']+src=(?P[\'"])(?P(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)', + webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id, encoding='iso-8859-1') + + jwplayer_data = self._parse_json(self._search_regex( + r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id, + transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s))) + + info_dict = self._parse_jwplayer_data( + jwplayer_data, video_id, require_title=False, m3u8_id='hls', mpd_id='dash') + + info_dict.update({ + 'title': self._og_search_title(webpage), + 'upload_date': unified_strdate(get_element_by_class('date', webpage)), + }) + + return info_dict diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dl/extractor/ruhd.py new file mode 100644 index 000000000..3c8053a26 --- /dev/null +++ b/youtube_dl/extractor/ruhd.py @@ -0,0 +1,45 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RUHDIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ruhd\.ru/play\.php\?vid=(?P\d+)' + _TEST = { + 'url': 'http://www.ruhd.ru/play.php?vid=207', + 'md5': 'd1a9ec4edf8598e3fbd92bb16072ba83', + 'info_dict': { + 'id': '207', + 'ext': 'divx', + 'title': 'КОТ бааааам', + 'description': 'классный кот)', + 'thumbnail': r're:^http://.*\.jpg$', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_regex( + r'([^<]+)   RUHD\.ru - Видео Высокого качества №1 в России!', + webpage, 'title') + description = self._html_search_regex( + r'(?s)
    (.+?)', + webpage, 'description', fatal=False) + thumbnail = self._html_search_regex( + r'[\da-z]{32})' + + _TESTS = [{ + 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', + 'md5': '1d24f180fac7a02f3900712e5a5764d6', + 'info_dict': { + 'id': '3eac3b4561676c17df9132a9a1e62e3e', + 'ext': 'mp4', + 'title': 'Раненный кенгуру забежал в аптеку', + 'description': 'http://www.ntdtv.ru ', + 'duration': 81, + 'uploader': 'NTDRussian', + 'uploader_id': '29790', + 'timestamp': 1381943602, + 'upload_date': '20131016', + 'age_limit': 0, + }, + }, { + 'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', + 'only_matching': True, + }, { + 'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661', + 'only_matching': True, + }, { + 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252', + 'only_matching': True, + }, { + 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url) + + @staticmethod + def _extract_urls(webpage): + return [mobj.group('url') for mobj in re.finditer( + r']+?src=(["\'])(?P(?:https?:)?//rutube\.ru/embed/[\da-z]{32}.*?)\1', + webpage)] + + def _real_extract(self, url): + video_id = self._match_id(url) + info = self._download_and_extract_info(video_id) + info['formats'] = self._download_and_extract_formats(video_id) + return info + + +class RutubeEmbedIE(RutubeBaseIE): + IE_NAME = 'rutube:embed' + IE_DESC = 'Rutube embedded videos' + _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P[0-9]+)' + + _TESTS = [{ + 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', + 'info_dict': { + 'id': 'a10e53b86e8f349080f718582ce4c661', + 'ext': 'mp4', + 'timestamp': 1387830582, + 'upload_date': '20131223', + 'uploader_id': '297833', + 'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix

    восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89', + 'uploader': 'subziro89 ILya', + 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://rutube.ru/play/embed/8083783', + 'only_matching': True, + }, { + # private video + 'url': 'https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ', + 'only_matching': True, + }] + + def _real_extract(self, url): + embed_id = self._match_id(url) + # Query may contain private videos token and should be passed to API + # requests (see #19163) + query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + options = self._download_api_options(embed_id, query) + video_id = options['effective_video'] + formats = self._extract_formats(options, video_id) + info = self._download_and_extract_info(video_id, query) + info.update({ + 'extractor_key': 'Rutube', + 'formats': formats, + }) + return info + + +class RutubePlaylistBaseIE(RutubeBaseIE): + def _next_page_url(self, page_num, playlist_id, *args, **kwargs): + return self._PAGE_TEMPLATE % (playlist_id, page_num) + + def _entries(self, playlist_id, *args, **kwargs): + next_page_url = None + for pagenum in itertools.count(1): + page = self._download_json( + next_page_url or self._next_page_url( + pagenum, playlist_id, *args, **kwargs), + playlist_id, 'Downloading page %s' % pagenum) + + results = page.get('results') + if not results or not isinstance(results, list): + break + + for result in results: + video_url = url_or_none(result.get('video_url')) + if not video_url: + continue + entry = self._extract_info(result, require_title=False) + entry.update({ + '_type': 'url', + 'url': video_url, + 'ie_key': RutubeIE.ie_key(), + }) + yield entry + + next_page_url = page.get('next') + if not next_page_url or not page.get('has_next'): + break + + def _extract_playlist(self, playlist_id, *args, **kwargs): + return self.playlist_result( + self._entries(playlist_id, *args, **kwargs), + playlist_id, kwargs.get('playlist_name')) + + def _real_extract(self, url): + return self._extract_playlist(self._match_id(url)) + + +class RutubeChannelIE(RutubePlaylistBaseIE): + IE_NAME = 'rutube:channel' + IE_DESC = 'Rutube channels' + _VALID_URL = r'https?://rutube\.ru/tags/video/(?P\d+)' + _TESTS = [{ + 'url': 'http://rutube.ru/tags/video/1800/', + 'info_dict': { + 'id': '1800', + }, + 'playlist_mincount': 68, + }] + + _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' + + +class RutubeMovieIE(RutubePlaylistBaseIE): + IE_NAME = 'rutube:movie' + IE_DESC = 'Rutube movies' + _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P\d+)' + _TESTS = [] + + _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' + _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' + + def _real_extract(self, url): + movie_id = self._match_id(url) + movie = self._download_json( + self._MOVIE_TEMPLATE % movie_id, movie_id, + 'Downloading movie JSON') + return self._extract_playlist( + movie_id, playlist_name=movie.get('name')) + + +class RutubePersonIE(RutubePlaylistBaseIE): + IE_NAME = 'rutube:person' + IE_DESC = 'Rutube person videos' + _VALID_URL = r'https?://rutube\.ru/video/person/(?P\d+)' + _TESTS = [{ + 'url': 'http://rutube.ru/video/person/313878/', + 'info_dict': { + 'id': '313878', + }, + 'playlist_mincount': 37, + }] + + _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' + + +class RutubePlaylistIE(RutubePlaylistBaseIE): + IE_NAME = 'rutube:playlist' + IE_DESC = 'Rutube playlists' + _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P\d+)' + _TESTS = [{ + 'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag', + 'info_dict': { + 'id': '3097', + }, + 'playlist_count': 27, + }, { + 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source', + 'only_matching': True, + }] + + _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json' + + @classmethod + def suitable(cls, url): + if not super(RutubePlaylistIE, cls).suitable(url): + return False + params = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0]) + + def _next_page_url(self, page_num, playlist_id, item_kind): + return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num) + + def _real_extract(self, url): + qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + playlist_kind = qs['pl_type'][0] + playlist_id = qs['pl_id'][0] + return self._extract_playlist(playlist_id, item_kind=playlist_kind) diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py new file mode 100644 index 000000000..d2713c19a --- /dev/null +++ b/youtube_dl/extractor/rutv.py @@ -0,0 +1,211 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none +) + + +class RUTVIE(InfoExtractor): + IE_DESC = 'RUTV.RU' + _VALID_URL = r'''(?x) + https?:// + (?:test)?player\.(?:rutv\.ru|vgtrk\.com)/ + (?P + flash\d+v/container\.swf\?id=| + iframe/(?Pswf|video|live)/id/| + index/iframe/cast_id/ + ) + (?P\d+) + ''' + + _TESTS = [ + { + 'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724', + 'info_dict': { + 'id': '774471', + 'ext': 'mp4', + 'title': 'Монологи на все времена', + 'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5', + 'duration': 2906, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638', + 'info_dict': { + 'id': '774016', + 'ext': 'mp4', + 'title': 'Чужой в семье Сталина', + 'description': '', + 'duration': 2539, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000', + 'info_dict': { + 'id': '766888', + 'ext': 'mp4', + 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"', + 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995', + 'duration': 279, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169', + 'info_dict': { + 'id': '771852', + 'ext': 'mp4', + 'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет', + 'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8', + 'duration': 3096, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014', + 'info_dict': { + 'id': '51499', + 'ext': 'flv', + 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ', + 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c', + }, + 'skip': 'Translation has finished', + }, + { + 'url': 'http://player.rutv.ru/iframe/live/id/21/showZoomBtn/false/isPlay/true/', + 'info_dict': { + 'id': '21', + 'ext': 'mp4', + 'title': 're:^Россия 24. Прямой эфир [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'is_live': True, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, + { + 'url': 'https://testplayer.vgtrk.com/iframe/live/id/19201/showZoomBtn/false/isPlay/true/', + 'only_matching': True, + }, + ] + + @classmethod + def _extract_url(cls, webpage): + mobj = re.search( + r']+?src=(["\'])(?Phttps?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', webpage) + if mobj: + return mobj.group('url') + + mobj = re.search( + r']+?property=(["\'])og:video\1[^>]+?content=(["\'])(?Phttps?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)', + webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + video_path = mobj.group('path') + + if re.match(r'flash\d+v', video_path): + video_type = 'video' + elif video_path.startswith('iframe'): + video_type = mobj.group('type') + if video_type == 'swf': + video_type = 'video' + elif video_path.startswith('index/iframe/cast_id'): + video_type = 'live' + + is_live = video_type == 'live' + + json_data = self._download_json( + 'http://player.rutv.ru/iframe/data%s/id/%s' % ('live' if is_live else 'video', video_id), + video_id, 'Downloading JSON') + + if json_data['errors']: + raise ExtractorError('%s said: %s' % (self.IE_NAME, json_data['errors']), expected=True) + + playlist = json_data['data']['playlist'] + medialist = playlist['medialist'] + media = medialist[0] + + if media['errors']: + raise ExtractorError('%s said: %s' % (self.IE_NAME, media['errors']), expected=True) + + view_count = playlist.get('count_views') + priority_transport = playlist['priority_transport'] + + thumbnail = media['picture'] + width = int_or_none(media['width']) + height = int_or_none(media['height']) + description = media['anons'] + title = media['title'] + duration = int_or_none(media.get('duration')) + + formats = [] + + for transport, links in media['sources'].items(): + for quality, url in links.items(): + preference = -1 if priority_transport == transport else -2 + if transport == 'rtmp': + mobj = re.search(r'^(?Prtmp://[^/]+/(?P.+))/(?P.+)$', url) + if not mobj: + continue + fmt = { + 'url': mobj.group('url'), + 'play_path': mobj.group('playpath'), + 'app': mobj.group('app'), + 'page_url': 'http://player.rutv.ru', + 'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22', + 'rtmp_live': True, + 'ext': 'flv', + 'vbr': int(quality), + 'preference': preference, + } + elif transport == 'm3u8': + formats.extend(self._extract_m3u8_formats( + url, video_id, 'mp4', preference=preference, m3u8_id='hls')) + continue + else: + fmt = { + 'url': url + } + fmt.update({ + 'width': width, + 'height': height, + 'format_id': '%s-%s' % (transport, quality), + }) + formats.append(fmt) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._live_title(title) if is_live else title, + 'description': description, + 'thumbnail': thumbnail, + 'view_count': view_count, + 'duration': duration, + 'formats': formats, + 'is_live': is_live, + } diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dl/extractor/ruutu.py new file mode 100644 index 000000000..f984040aa --- /dev/null +++ b/youtube_dl/extractor/ruutu.py @@ -0,0 +1,153 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlparse +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, + xpath_attr, + xpath_text, +) + + +class RuutuIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?:ruutu|supla)\.fi/(?:video|supla)/(?P\d+)' + _TESTS = [ + { + 'url': 'http://www.ruutu.fi/video/2058907', + 'md5': 'ab2093f39be1ca8581963451b3c0234f', + 'info_dict': { + 'id': '2058907', + 'ext': 'mp4', + 'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!', + 'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 114, + 'age_limit': 0, + }, + }, + { + 'url': 'http://www.ruutu.fi/video/2057306', + 'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9', + 'info_dict': { + 'id': '2057306', + 'ext': 'mp4', + 'title': 'Superpesis: katso koko kausi Ruudussa', + 'description': 'md5:bfb7336df2a12dc21d18fa696c9f8f23', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 40, + 'age_limit': 0, + }, + }, + { + 'url': 'http://www.supla.fi/supla/2231370', + 'md5': 'df14e782d49a2c0df03d3be2a54ef949', + 'info_dict': { + 'id': '2231370', + 'ext': 'mp4', + 'title': 'Osa 1: Mikael Jungner', + 'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 0, + }, + }, + # Episode where is "NOT-USED", but has other + # downloadable sources available. + { + 'url': 'http://www.ruutu.fi/video/3193728', + 'only_matching': True, + }, + { + # audio podcast + 'url': 'https://www.supla.fi/supla/3382410', + 'md5': 'b9d7155fed37b2ebf6021d74c4b8e908', + 'info_dict': { + 'id': '3382410', + 'ext': 'mp3', + 'title': 'Mikä ihmeen poltergeist?', + 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 0, + }, + 'expected_warnings': ['HTTP Error 502: Bad Gateway'], + } + ] + + def _real_extract(self, url): + video_id = self._match_id(url) + + video_xml = self._download_xml( + 'https://gatling.nelonenmedia.fi/media-xml-cache', video_id, + query={'id': video_id}) + + formats = [] + processed_urls = [] + + def extract_formats(node): + for child in node: + if child.tag.endswith('Files'): + extract_formats(child) + elif child.tag.endswith('File'): + video_url = child.text + if (not video_url or video_url in processed_urls + or any(p in video_url for p in ('NOT_USED', 'NOT-USED'))): + continue + processed_urls.append(video_url) + ext = determine_ext(video_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) + elif ext == 'f4m': + formats.extend(self._extract_f4m_formats( + video_url, video_id, f4m_id='hds', fatal=False)) + elif ext == 'mpd': + # video-only and audio-only streams are of different + # duration resulting in out of sync issue + continue + formats.extend(self._extract_mpd_formats( + video_url, video_id, mpd_id='dash', fatal=False)) + elif ext == 'mp3' or child.tag == 'AudioMediaFile': + formats.append({ + 'format_id': 'audio', + 'url': video_url, + 'vcodec': 'none', + }) + else: + proto = compat_urllib_parse_urlparse(video_url).scheme + if not child.tag.startswith('HTTP') and proto != 'rtmp': + continue + preference = -1 if proto == 'rtmp' else 1 + label = child.get('label') + tbr = int_or_none(child.get('bitrate')) + format_id = '%s-%s' % (proto, label if label else tbr) if label or tbr else proto + if not self._is_valid_url(video_url, video_id, format_id): + continue + width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]] + formats.append({ + 'format_id': format_id, + 'url': video_url, + 'width': width, + 'height': height, + 'tbr': tbr, + 'preference': preference, + }) + + extract_formats(video_xml.find('./Clip')) + + drm = xpath_text(video_xml, './Clip/DRM', default=None) + if not formats and drm: + raise ExtractorError('This video is DRM protected.', expected=True) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': xpath_attr(video_xml, './/Behavior/Program', 'program_name', 'title', fatal=True), + 'description': xpath_attr(video_xml, './/Behavior/Program', 'description', 'description'), + 'thumbnail': xpath_attr(video_xml, './/Behavior/Startpicture', 'href', 'thumbnail'), + 'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')), + 'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')), + 'formats': formats, + } diff --git a/youtube_dl/extractor/ruv.py b/youtube_dl/extractor/ruv.py new file mode 100644 index 000000000..8f3cc4095 --- /dev/null +++ b/youtube_dl/extractor/ruv.py @@ -0,0 +1,101 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + unified_timestamp, +) + + +class RuvIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P[^/]+(?:/\d+)?)' + _TESTS = [{ + # m3u8 + 'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516', + 'md5': '66347652f4e13e71936817102acc1724', + 'info_dict': { + 'id': '1144499', + 'display_id': 'fh-valur/20170516', + 'ext': 'mp4', + 'title': 'FH - Valur', + 'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.', + 'timestamp': 1494963600, + 'upload_date': '20170516', + }, + }, { + # mp3 + 'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619', + 'md5': '395ea250c8a13e5fdb39d4670ef85378', + 'info_dict': { + 'id': '1153630', + 'display_id': 'morgunutvarpid/20170619', + 'ext': 'mp3', + 'title': 'Morgunútvarpið', + 'description': 'md5:a4cf1202c0a1645ca096b06525915418', + 'timestamp': 1497855000, + 'upload_date': '20170619', + }, + }, { + 'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614', + 'only_matching': True, + }, { + 'url': 'http://www.ruv.is/node/1151854', + 'only_matching': True, + }, { + 'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun', + 'only_matching': True, + }, { + 'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + title = self._og_search_title(webpage) + + FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P(?:(?!\1).)+)\1' + + media_url = self._html_search_regex( + FIELD_RE % 'src', webpage, 'video URL', group='url') + + video_id = self._search_regex( + r']+\bhref=["\']https?://www\.ruv\.is/node/(\d+)', + webpage, 'video id', default=display_id) + + ext = determine_ext(media_url) + + if ext == 'm3u8': + formats = self._extract_m3u8_formats( + media_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + elif ext == 'mp3': + formats = [{ + 'format_id': 'mp3', + 'url': media_url, + 'vcodec': 'none', + }] + else: + formats = [{ + 'url': media_url, + }] + + description = self._og_search_description(webpage, default=None) + thumbnail = self._og_search_thumbnail( + webpage, default=None) or self._search_regex( + FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False) + timestamp = unified_timestamp(self._html_search_meta( + 'article:published_time', webpage, 'timestamp', fatal=False)) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'formats': formats, + } diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py new file mode 100644 index 000000000..8d4806794 --- /dev/null +++ b/youtube_dl/extractor/safari.py @@ -0,0 +1,263 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import re + +from .common import InfoExtractor + +from ..compat import ( + compat_parse_qs, + compat_str, + compat_urlparse, +) +from ..utils import ( + ExtractorError, + update_url_query, +) + + +class SafariBaseIE(InfoExtractor): + _LOGIN_URL = 'https://learning.oreilly.com/accounts/login/' + _NETRC_MACHINE = 'safari' + + _API_BASE = 'https://learning.oreilly.com/api/v1' + _API_FORMAT = 'json' + + LOGGED_IN = False + + def _real_initialize(self): + self._login() + + def _login(self): + username, password = self._get_login_info() + if username is None: + return + + _, urlh = self._download_webpage_handle( + 'https://learning.oreilly.com/accounts/login-check/', None, + 'Downloading login page') + + def is_logged(urlh): + return 'learning.oreilly.com/home/' in compat_str(urlh.geturl()) + + if is_logged(urlh): + self.LOGGED_IN = True + return + + redirect_url = compat_str(urlh.geturl()) + parsed_url = compat_urlparse.urlparse(redirect_url) + qs = compat_parse_qs(parsed_url.query) + next_uri = compat_urlparse.urljoin( + 'https://api.oreilly.com', qs['next'][0]) + + auth, urlh = self._download_json_handle( + 'https://www.oreilly.com/member/auth/login/', None, 'Logging in', + data=json.dumps({ + 'email': username, + 'password': password, + 'redirect_uri': next_uri, + }).encode(), headers={ + 'Content-Type': 'application/json', + 'Referer': redirect_url, + }, expected_status=400) + + credentials = auth.get('credentials') + if (not auth.get('logged_in') and not auth.get('redirect_uri') + and credentials): + raise ExtractorError( + 'Unable to login: %s' % credentials, expected=True) + + # oreilly serves two same groot_sessionid cookies in Set-Cookie header + # and expects first one to be actually set + self._apply_first_set_cookie_header(urlh, 'groot_sessionid') + + _, urlh = self._download_webpage_handle( + auth.get('redirect_uri') or next_uri, None, 'Completing login',) + + if is_logged(urlh): + self.LOGGED_IN = True + return + + raise ExtractorError('Unable to log in') + + +class SafariIE(SafariBaseIE): + IE_NAME = 'safari' + IE_DESC = 'safaribooksonline.com online video' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/ + (?: + library/view/[^/]+/(?P[^/]+)/(?P[^/?\#&]+)\.html| + videos/[^/]+/[^/]+/(?P[^-]+-[^/?\#&]+) + ) + ''' + + _TESTS = [{ + 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', + 'md5': 'dcc5a425e79f2564148652616af1f2a3', + 'info_dict': { + 'id': '0_qbqx90ic', + 'ext': 'mp4', + 'title': 'Introduction to Hadoop Fundamentals LiveLessons', + 'timestamp': 1437758058, + 'upload_date': '20150724', + 'uploader_id': 'stork', + }, + }, { + # non-digits in course id + 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', + 'only_matching': True, + }, { + 'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html', + 'only_matching': True, + }, { + 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00', + 'only_matching': True, + }, { + 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro', + 'only_matching': True, + }, { + 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/00_SeriesIntro.html', + 'only_matching': True, + }] + + _PARTNER_ID = '1926081' + _UICONF_ID = '29375172' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + reference_id = mobj.group('reference_id') + if reference_id: + video_id = reference_id + partner_id = self._PARTNER_ID + ui_id = self._UICONF_ID + else: + video_id = '%s-%s' % (mobj.group('course_id'), mobj.group('part')) + + webpage, urlh = self._download_webpage_handle(url, video_id) + + mobj = re.match(self._VALID_URL, urlh.geturl()) + reference_id = mobj.group('reference_id') + if not reference_id: + reference_id = self._search_regex( + r'data-reference-id=(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'kaltura reference id', group='id') + partner_id = self._search_regex( + r'data-partner-id=(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'kaltura widget id', default=self._PARTNER_ID, + group='id') + ui_id = self._search_regex( + r'data-ui-id=(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'kaltura uiconf id', default=self._UICONF_ID, + group='id') + + query = { + 'wid': '_%s' % partner_id, + 'uiconf_id': ui_id, + 'flashvars[referenceId]': reference_id, + } + + if self.LOGGED_IN: + kaltura_session = self._download_json( + '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id), + video_id, 'Downloading kaltura session JSON', + 'Unable to download kaltura session JSON', fatal=False) + if kaltura_session: + session = kaltura_session.get('session') + if session: + query['flashvars[ks]'] = session + + return self.url_result(update_url_query( + 'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query), + 'Kaltura') + + +class SafariApiIE(SafariBaseIE): + IE_NAME = 'safari:api' + _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/api/v1/book/(?P[^/]+)/chapter(?:-content)?/(?P[^/?#&]+)\.html' + + _TESTS = [{ + 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', + 'only_matching': True, + }, { + 'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + part = self._download_json( + url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')), + 'Downloading part JSON') + return self.url_result(part['web_url'], SafariIE.ie_key()) + + +class SafariCourseIE(SafariBaseIE): + IE_NAME = 'safari:course' + IE_DESC = 'safaribooksonline.com online courses' + + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/ + (?: + library/view/[^/]+| + api/v1/book| + videos/[^/]+ + )| + techbus\.safaribooksonline\.com + ) + /(?P[^/]+) + ''' + + _TESTS = [{ + 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', + 'info_dict': { + 'id': '9780133392838', + 'title': 'Hadoop Fundamentals LiveLessons', + }, + 'playlist_count': 22, + 'skip': 'Requires safaribooksonline account credentials', + }, { + 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', + 'only_matching': True, + }, { + 'url': 'http://techbus.safaribooksonline.com/9780134426365', + 'only_matching': True, + }, { + 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314', + 'only_matching': True, + }, { + 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838', + 'only_matching': True, + }, { + 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return (False if SafariIE.suitable(url) or SafariApiIE.suitable(url) + else super(SafariCourseIE, cls).suitable(url)) + + def _real_extract(self, url): + course_id = self._match_id(url) + + course_json = self._download_json( + '%s/book/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT), + course_id, 'Downloading course JSON') + + if 'chapters' not in course_json: + raise ExtractorError( + 'No chapters found for course %s' % course_id, expected=True) + + entries = [ + self.url_result(chapter, SafariApiIE.ie_key()) + for chapter in course_json['chapters']] + + course_title = course_json['title'] + + return self.playlist_result(entries, course_id, course_title) diff --git a/youtube_dl/extractor/sapo.py b/youtube_dl/extractor/sapo.py new file mode 100644 index 000000000..49a9b313a --- /dev/null +++ b/youtube_dl/extractor/sapo.py @@ -0,0 +1,119 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + unified_strdate, +) + + +class SapoIE(InfoExtractor): + IE_DESC = 'SAPO Vídeos' + _VALID_URL = r'https?://(?:(?:v2|www)\.)?videos\.sapo\.(?:pt|cv|ao|mz|tl)/(?P[\da-zA-Z]{20})' + + _TESTS = [ + { + 'url': 'http://videos.sapo.pt/UBz95kOtiWYUMTA5Ghfi', + 'md5': '79ee523f6ecb9233ac25075dee0eda83', + 'note': 'SD video', + 'info_dict': { + 'id': 'UBz95kOtiWYUMTA5Ghfi', + 'ext': 'mp4', + 'title': 'Benfica - Marcas na Hitória', + 'description': 'md5:c9082000a128c3fd57bf0299e1367f22', + 'duration': 264, + 'uploader': 'tiago_1988', + 'upload_date': '20080229', + 'categories': ['benfica', 'cabral', 'desporto', 'futebol', 'geovanni', 'hooijdonk', 'joao', 'karel', 'lisboa', 'miccoli'], + }, + }, + { + 'url': 'http://videos.sapo.pt/IyusNAZ791ZdoCY5H5IF', + 'md5': '90a2f283cfb49193fe06e861613a72aa', + 'note': 'HD video', + 'info_dict': { + 'id': 'IyusNAZ791ZdoCY5H5IF', + 'ext': 'mp4', + 'title': 'Codebits VII - Report', + 'description': 'md5:6448d6fd81ce86feac05321f354dbdc8', + 'duration': 144, + 'uploader': 'codebits', + 'upload_date': '20140427', + 'categories': ['codebits', 'codebits2014'], + }, + }, + { + 'url': 'http://v2.videos.sapo.pt/yLqjzPtbTimsn2wWBKHz', + 'md5': 'e5aa7cc0bdc6db9b33df1a48e49a15ac', + 'note': 'v2 video', + 'info_dict': { + 'id': 'yLqjzPtbTimsn2wWBKHz', + 'ext': 'mp4', + 'title': 'Hipnose Condicionativa 4', + 'description': 'md5:ef0481abf8fb4ae6f525088a6dadbc40', + 'duration': 692, + 'uploader': 'sapozen', + 'upload_date': '20090609', + 'categories': ['condicionativa', 'heloisa', 'hipnose', 'miranda', 'sapo', 'zen'], + }, + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + item = self._download_xml( + 'http://rd3.videos.sapo.pt/%s/rss2' % video_id, video_id).find('./channel/item') + + title = item.find('./title').text + description = item.find('./{http://videos.sapo.pt/mrss/}synopse').text + thumbnail = item.find('./{http://search.yahoo.com/mrss/}content').get('url') + duration = parse_duration(item.find('./{http://videos.sapo.pt/mrss/}time').text) + uploader = item.find('./{http://videos.sapo.pt/mrss/}author').text + upload_date = unified_strdate(item.find('./pubDate').text) + view_count = int(item.find('./{http://videos.sapo.pt/mrss/}views').text) + comment_count = int(item.find('./{http://videos.sapo.pt/mrss/}comment_count').text) + tags = item.find('./{http://videos.sapo.pt/mrss/}tags').text + categories = tags.split() if tags else [] + age_limit = 18 if item.find('./{http://videos.sapo.pt/mrss/}m18').text == 'true' else 0 + + video_url = item.find('./{http://videos.sapo.pt/mrss/}videoFile').text + video_size = item.find('./{http://videos.sapo.pt/mrss/}videoSize').text.split('x') + + formats = [{ + 'url': video_url, + 'ext': 'mp4', + 'format_id': 'sd', + 'width': int(video_size[0]), + 'height': int(video_size[1]), + }] + + if item.find('./{http://videos.sapo.pt/mrss/}HD').text == 'true': + formats.append({ + 'url': re.sub(r'/mov/1$', '/mov/39', video_url), + 'ext': 'mp4', + 'format_id': 'hd', + 'width': 1280, + 'height': 720, + }) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'uploader': uploader, + 'upload_date': upload_date, + 'view_count': view_count, + 'comment_count': comment_count, + 'categories': categories, + 'age_limit': age_limit, + 'formats': formats, + } diff --git a/youtube_dl/extractor/savefrom.py b/youtube_dl/extractor/savefrom.py new file mode 100644 index 000000000..21e44b69a --- /dev/null +++ b/youtube_dl/extractor/savefrom.py @@ -0,0 +1,34 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import os.path +import re + +from .common import InfoExtractor + + +class SaveFromIE(InfoExtractor): + IE_NAME = 'savefrom.net' + _VALID_URL = r'https?://[^.]+\.savefrom\.net/\#url=(?P.*)$' + + _TEST = { + 'url': 'http://en.savefrom.net/#url=http://youtube.com/watch?v=UlVRAPW2WJY&utm_source=youtube.com&utm_medium=short_domains&utm_campaign=ssyoutube.com', + 'info_dict': { + 'id': 'UlVRAPW2WJY', + 'ext': 'mp4', + 'title': 'About Team Radical MMA | MMA Fighting', + 'upload_date': '20120816', + 'uploader': 'Howcast', + 'uploader_id': 'Howcast', + 'description': r're:(?s).* Hi, my name is Rene Dreifuss\. And I\'m here to show you some MMA.*', + }, + 'params': { + 'skip_download': True + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = os.path.splitext(url.split('/')[-1])[0] + + return self.url_result(mobj.group('url'), video_id=video_id) diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py new file mode 100644 index 000000000..0e623ff7b --- /dev/null +++ b/youtube_dl/extractor/sbs.py @@ -0,0 +1,66 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + smuggle_url, + ExtractorError, +) + + +class SBSIE(InfoExtractor): + IE_DESC = 'sbs.com.au' + _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand|news)/video/(?:single/)?(?P[0-9]+)' + + _TESTS = [{ + # Original URL is handled by the generic IE which finds the iframe: + # http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation + 'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed', + 'md5': '3150cf278965eeabb5b4cea1c963fe0a', + 'info_dict': { + 'id': '320403011771', + 'ext': 'mp4', + 'title': 'Dingo Conservation (The Feed)', + 'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5', + 'thumbnail': r're:http://.*\.jpg', + 'duration': 308, + 'timestamp': 1408613220, + 'upload_date': '20140821', + 'uploader': 'SBSC', + }, + }, { + 'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed', + 'only_matching': True, + }, { + 'url': 'http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + player_params = self._download_json( + 'http://www.sbs.com.au/api/video_pdkvars/id/%s?form=json' % video_id, video_id) + + error = player_params.get('error') + if error: + error_message = 'Sorry, The video you are looking for does not exist.' + video_data = error.get('results') or {} + error_code = error.get('errorCode') + if error_code == 'ComingSoon': + error_message = '%s is not yet available.' % video_data.get('title', '') + elif error_code in ('Forbidden', 'intranetAccessOnly'): + error_message = 'Sorry, This video cannot be accessed via this website' + elif error_code == 'Expired': + error_message = 'Sorry, %s is no longer available.' % video_data.get('title', '') + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) + + urls = player_params['releaseUrls'] + theplatform_url = (urls.get('progressive') or urls.get('html') + or urls.get('standard') or player_params['relatedItemsURL']) + + return { + '_type': 'url_transparent', + 'ie_key': 'ThePlatform', + 'id': video_id, + 'url': smuggle_url(self._proto_relative_url(theplatform_url), {'force_smil_url': True}), + } diff --git a/youtube_dl/extractor/screencast.py b/youtube_dl/extractor/screencast.py new file mode 100644 index 000000000..69a0d01f3 --- /dev/null +++ b/youtube_dl/extractor/screencast.py @@ -0,0 +1,123 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_request, +) +from ..utils import ( + ExtractorError, +) + + +class ScreencastIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?screencast\.com/t/(?P[a-zA-Z0-9]+)' + _TESTS = [{ + 'url': 'http://www.screencast.com/t/3ZEjQXlT', + 'md5': '917df1c13798a3e96211dd1561fded83', + 'info_dict': { + 'id': '3ZEjQXlT', + 'ext': 'm4v', + 'title': 'Color Measurement with Ocean Optics Spectrometers', + 'description': 'md5:240369cde69d8bed61349a199c5fb153', + 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://www.screencast.com/t/V2uXehPJa1ZI', + 'md5': 'e8e4b375a7660a9e7e35c33973410d34', + 'info_dict': { + 'id': 'V2uXehPJa1ZI', + 'ext': 'mov', + 'title': 'The Amadeus Spectrometer', + 'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit', + 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://www.screencast.com/t/aAB3iowa', + 'md5': 'dedb2734ed00c9755761ccaee88527cd', + 'info_dict': { + 'id': 'aAB3iowa', + 'ext': 'mp4', + 'title': 'Google Earth Export', + 'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.', + 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://www.screencast.com/t/X3ddTrYh', + 'md5': '669ee55ff9c51988b4ebc0877cc8b159', + 'info_dict': { + 'id': 'X3ddTrYh', + 'ext': 'wmv', + 'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression', + 'description': 'md5:7b9f393bc92af02326a5c5889639eab0', + 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', + } + }, { + 'url': 'http://screencast.com/t/aAB3iowa', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_regex( + r'(?:(?!\1).)+)\1', + webpage, 'video url', default=None, group='url') + + if video_url is None: + video_url = self._html_search_meta( + 'og:video', webpage, default=None) + + if video_url is None: + raise ExtractorError('Cannot find video') + + title = self._og_search_title(webpage, default=None) + if title is None: + title = self._html_search_regex( + [r'Title: ([^<]+)
    ', + r'class="tabSeperator">>
    (.+?)<', + r'([^<]+)'], + webpage, 'title') + thumbnail = self._og_search_thumbnail(webpage) + description = self._og_search_description(webpage, default=None) + if description is None: + description = self._html_search_meta('description', webpage) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/screencastomatic.py b/youtube_dl/extractor/screencastomatic.py new file mode 100644 index 000000000..b5e76c9af --- /dev/null +++ b/youtube_dl/extractor/screencastomatic.py @@ -0,0 +1,37 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import js_to_json + + +class ScreencastOMaticIE(InfoExtractor): + _VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P[0-9a-zA-Z]+)' + _TEST = { + 'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl', + 'md5': '483583cb80d92588f15ccbedd90f0c18', + 'info_dict': { + 'id': 'c2lD3BeOPl', + 'ext': 'mp4', + 'title': 'Welcome to 3-4 Philosophy @ DECV!', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.', + 'duration': 369.163, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + jwplayer_data = self._parse_json( + self._search_regex( + r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);", webpage, 'setup code'), + video_id, transform_source=js_to_json) + + info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False) + info_dict.update({ + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + }) + return info_dict diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py new file mode 100644 index 000000000..8b3275735 --- /dev/null +++ b/youtube_dl/extractor/scrippsnetworks.py @@ -0,0 +1,104 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import hashlib +import re + +from .aws import AWSIE +from .anvato import AnvatoIE +from ..utils import ( + smuggle_url, + urlencode_postdata, + xpath_text, +) + + +class ScrippsNetworksWatchIE(AWSIE): + IE_NAME = 'scrippsnetworks:watch' + _VALID_URL = r'''(?x) + https?:// + watch\. + (?Pgeniuskitchen)\.com/ + (?: + player\.[A-Z0-9]+\.html\#| + show/(?:[^/]+/){2}| + player/ + ) + (?P\d+) + ''' + _TESTS = [{ + 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/', + 'info_dict': { + 'id': '4194875', + 'ext': 'mp4', + 'title': 'Ample Hills Ice Cream Bike', + 'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.', + 'uploader': 'ANV', + 'upload_date': '20171011', + 'timestamp': 1507698000, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [AnvatoIE.ie_key()], + }] + + _SNI_TABLE = { + 'geniuskitchen': 'genius', + } + + _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1' + _AWS_PROXY_HOST = 'web.api.video.snidigital.com' + + _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + site_id, video_id = mobj.group('site', 'id') + + aws_identity_id_json = json.dumps({ + 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION + }).encode('utf-8') + token = self._download_json( + 'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id, + data=aws_identity_id_json, + headers={ + 'Accept': '*/*', + 'Content-Type': 'application/x-amz-json-1.1', + 'Referer': url, + 'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(), + 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken', + 'X-Amz-User-Agent': self._AWS_USER_AGENT, + })['Token'] + + sts = self._download_xml( + 'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({ + 'Action': 'AssumeRoleWithWebIdentity', + 'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role', + 'RoleSessionName': 'web-identity', + 'Version': '2011-06-15', + 'WebIdentityToken': token, + }), headers={ + 'Referer': url, + 'X-Amz-User-Agent': self._AWS_USER_AGENT, + 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8', + }) + + def get(key): + return xpath_text( + sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key, + fatal=True) + + mcp_id = self._aws_execute_api({ + 'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id), + 'access_key': get('AccessKeyId'), + 'secret_key': get('SecretAccessKey'), + 'session_token': get('SessionToken'), + }, video_id)['results'][0]['mcpId'] + + return self.url_result( + smuggle_url( + 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id, + {'geo_countries': ['US']}), + AnvatoIE.ie_key(), video_id=mcp_id) diff --git a/youtube_dl/extractor/seeker.py b/youtube_dl/extractor/seeker.py new file mode 100644 index 000000000..3b9c65e7e --- /dev/null +++ b/youtube_dl/extractor/seeker.py @@ -0,0 +1,57 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class SeekerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P.*)-(?P\d+)\.html' + _TESTS = [{ + # player.loadRevision3Item + 'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html', + 'md5': '30c1dc4030cc715cf05b423d0947ac18', + 'info_dict': { + 'id': '76243', + 'ext': 'webm', + 'title': 'Should Trump Be Required To Release His Tax Returns?', + 'description': 'Donald Trump has been secretive about his "big," "beautiful" tax returns. So what can we learn if he decides to release them?', + 'uploader': 'Seeker Daily', + 'uploader_id': 'seekerdaily', + } + }, { + 'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html', + 'playlist': [ + { + 'md5': '83bcd157cab89ad7318dd7b8c9cf1306', + 'info_dict': { + 'id': '67558', + 'ext': 'mp4', + 'title': 'The Pros & Cons Of Zoos', + 'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?', + 'uploader': 'DNews', + 'uploader_id': 'dnews', + }, + } + ], + 'info_dict': { + 'id': '1834116536', + 'title': 'After Gorilla Killing, Changes Ahead for Zoos', + 'description': 'The largest association of zoos and others are hoping to learn from recent incidents that led to the shooting deaths of a gorilla and two lions.', + }, + }] + + def _real_extract(self, url): + display_id, article_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, display_id) + mobj = re.search(r"player\.loadRevision3Item\('([^']+)'\s*,\s*(\d+)\);", webpage) + if mobj: + playlist_type, playlist_id = mobj.groups() + return self.url_result( + 'revision3:%s:%s' % (playlist_type, playlist_id), 'Revision3Embed', playlist_id) + else: + entries = [self.url_result('revision3:video_id:%s' % video_id, 'Revision3Embed', video_id) for video_id in re.findall( + r']+src=[\'"](?:https?:)?//api\.seekernetwork\.com/player/embed\?videoId=(\d+)', webpage)] + return self.playlist_result( + entries, article_id, self._og_search_title(webpage), self._og_search_description(webpage)) diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py new file mode 100644 index 000000000..db5ef8b57 --- /dev/null +++ b/youtube_dl/extractor/senateisvp.py @@ -0,0 +1,153 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + unsmuggle_url, +) +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) + + +class SenateISVPIE(InfoExtractor): + _COMM_MAP = [ + ['ag', '76440', 'http://ag-f.akamaihd.net'], + ['aging', '76442', 'http://aging-f.akamaihd.net'], + ['approps', '76441', 'http://approps-f.akamaihd.net'], + ['armed', '76445', 'http://armed-f.akamaihd.net'], + ['banking', '76446', 'http://banking-f.akamaihd.net'], + ['budget', '76447', 'http://budget-f.akamaihd.net'], + ['cecc', '76486', 'http://srs-f.akamaihd.net'], + ['commerce', '80177', 'http://commerce1-f.akamaihd.net'], + ['csce', '75229', 'http://srs-f.akamaihd.net'], + ['dpc', '76590', 'http://dpc-f.akamaihd.net'], + ['energy', '76448', 'http://energy-f.akamaihd.net'], + ['epw', '76478', 'http://epw-f.akamaihd.net'], + ['ethics', '76449', 'http://ethics-f.akamaihd.net'], + ['finance', '76450', 'http://finance-f.akamaihd.net'], + ['foreign', '76451', 'http://foreign-f.akamaihd.net'], + ['govtaff', '76453', 'http://govtaff-f.akamaihd.net'], + ['help', '76452', 'http://help-f.akamaihd.net'], + ['indian', '76455', 'http://indian-f.akamaihd.net'], + ['intel', '76456', 'http://intel-f.akamaihd.net'], + ['intlnarc', '76457', 'http://intlnarc-f.akamaihd.net'], + ['jccic', '85180', 'http://jccic-f.akamaihd.net'], + ['jec', '76458', 'http://jec-f.akamaihd.net'], + ['judiciary', '76459', 'http://judiciary-f.akamaihd.net'], + ['rpc', '76591', 'http://rpc-f.akamaihd.net'], + ['rules', '76460', 'http://rules-f.akamaihd.net'], + ['saa', '76489', 'http://srs-f.akamaihd.net'], + ['smbiz', '76461', 'http://smbiz-f.akamaihd.net'], + ['srs', '75229', 'http://srs-f.akamaihd.net'], + ['uscc', '76487', 'http://srs-f.akamaihd.net'], + ['vetaff', '76462', 'http://vetaff-f.akamaihd.net'], + ['arch', '', 'http://ussenate-f.akamaihd.net/'] + ] + _IE_NAME = 'senate.gov' + _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P.+)' + _TESTS = [{ + 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', + 'info_dict': { + 'id': 'judiciary031715', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false', + 'info_dict': { + 'id': 'commerce011514', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player' + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi', + # checksum differs each time + 'info_dict': { + 'id': 'intel090613', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player' + } + }, { + # From http://www.c-span.org/video/?96791-1 + 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715', + 'only_matching': True, + }] + + @staticmethod + def _search_iframe_url(webpage): + mobj = re.search( + r"]+src=['\"](?Phttps?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]", + webpage) + if mobj: + return mobj.group('url') + + def _get_info_for_comm(self, committee): + for entry in self._COMM_MAP: + if entry[0] == committee: + return entry[1:] + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + + qs = compat_parse_qs(re.match(self._VALID_URL, url).group('qs')) + if not qs.get('filename') or not qs.get('type') or not qs.get('comm'): + raise ExtractorError('Invalid URL', expected=True) + + video_id = re.sub(r'.mp4$', '', qs['filename'][0]) + + webpage = self._download_webpage(url, video_id) + + if smuggled_data.get('force_title'): + title = smuggled_data['force_title'] + else: + title = self._html_search_regex(r'([^<]+)', webpage, video_id) + poster = qs.get('poster') + thumbnail = poster[0] if poster else None + + video_type = qs['type'][0] + committee = video_type if video_type == 'arch' else qs['comm'][0] + stream_num, domain = self._get_info_for_comm(committee) + + formats = [] + if video_type == 'arch': + filename = video_id if '.' in video_id else video_id + '.mp4' + formats = [{ + # All parameters in the query string are necessary to prevent a 403 error + 'url': compat_urlparse.urljoin(domain, filename) + '?v=3.1.0&fp=&r=&g=', + }] + else: + hdcore_sign = 'hdcore=3.1.0' + url_params = (domain, video_id, stream_num) + f4m_url = '%s/z/%s_1@%s/manifest.f4m?' % url_params + hdcore_sign + m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params + for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'): + # URLs without the extra param induce an 404 error + entry.update({'extra_param_to_segment_url': hdcore_sign}) + formats.append(entry) + for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'): + mobj = re.search(r'(?P(?:-p|-b)).m3u8', entry['url']) + if mobj: + entry['format_id'] += mobj.group('tag') + formats.append(entry) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/extractor/sendtonews.py b/youtube_dl/extractor/sendtonews.py new file mode 100644 index 000000000..9d9652949 --- /dev/null +++ b/youtube_dl/extractor/sendtonews.py @@ -0,0 +1,105 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + float_or_none, + parse_iso8601, + update_url_query, + int_or_none, + determine_protocol, + unescapeHTML, +) + + +class SendtoNewsIE(InfoExtractor): + _VALID_URL = r'https?://embed\.sendtonews\.com/player2/embedplayer\.php\?.*\bSC=(?P[0-9A-Za-z-]+)' + + _TEST = { + # From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/ + 'url': 'http://embed.sendtonews.com/player2/embedplayer.php?SC=GxfCe0Zo7D-175909-5588&type=single&autoplay=on&sound=YES', + 'info_dict': { + 'id': 'GxfCe0Zo7D-175909-5588' + }, + 'playlist_count': 8, + # test the first video only to prevent lengthy tests + 'playlist': [{ + 'info_dict': { + 'id': '240385', + 'ext': 'mp4', + 'title': 'Indians introduce Encarnacion', + 'description': 'Indians president of baseball operations Chris Antonetti and Edwin Encarnacion discuss the slugger\'s three-year contract with Cleveland', + 'duration': 137.898, + 'thumbnail': r're:https?://.*\.jpg$', + 'upload_date': '20170105', + 'timestamp': 1483649762, + }, + }], + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + _URL_TEMPLATE = '//embed.sendtonews.com/player2/embedplayer.php?SC=%s' + + @classmethod + def _extract_url(cls, webpage): + mobj = re.search(r'''(?x)]+src=([\'"]) + (?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\? + .*\bSC=(?P[0-9a-zA-Z-]+).* + \1>''', webpage) + if mobj: + sc = mobj.group('SC') + return cls._URL_TEMPLATE % sc + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + data_url = update_url_query( + url.replace('embedplayer.php', 'data_read.php'), + {'cmd': 'loadInitial'}) + playlist_data = self._download_json(data_url, playlist_id) + + entries = [] + for video in playlist_data['playlistData'][0]: + info_dict = self._parse_jwplayer_data( + video['jwconfiguration'], + require_title=False, m3u8_id='hls', rtmp_params={'no_resume': True}) + + for f in info_dict['formats']: + if f.get('tbr'): + continue + tbr = int_or_none(self._search_regex( + r'/(\d+)k/', f['url'], 'bitrate', default=None)) + if not tbr: + continue + f.update({ + 'format_id': '%s-%d' % (determine_protocol(f), tbr), + 'tbr': tbr, + }) + self._sort_formats(info_dict['formats'], ('tbr', 'height', 'width', 'format_id')) + + thumbnails = [] + if video.get('thumbnailUrl'): + thumbnails.append({ + 'id': 'normal', + 'url': video['thumbnailUrl'], + }) + if video.get('smThumbnailUrl'): + thumbnails.append({ + 'id': 'small', + 'url': video['smThumbnailUrl'], + }) + info_dict.update({ + 'title': video['S_headLine'].strip(), + 'description': unescapeHTML(video.get('S_fullStory')), + 'thumbnails': thumbnails, + 'duration': float_or_none(video.get('SM_length')), + 'timestamp': parse_iso8601(video.get('S_sysDate'), delimiter=' '), + }) + entries.append(info_dict) + + return self.playlist_result(entries, playlist_id) diff --git a/youtube_dl/extractor/servingsys.py b/youtube_dl/extractor/servingsys.py new file mode 100644 index 000000000..c013d678f --- /dev/null +++ b/youtube_dl/extractor/servingsys.py @@ -0,0 +1,72 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, +) + + +class ServingSysIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^.]+\.)?serving-sys\.com/BurstingPipe/adServer\.bs\?.*?&pli=(?P[0-9]+)' + + _TEST = { + 'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?', + 'info_dict': { + 'id': '5349193', + 'title': 'AdAPPter_Hyundai_demo', + }, + 'playlist': [{ + 'md5': 'baed851342df6846eb8677a60a011a0f', + 'info_dict': { + 'id': '29955898', + 'ext': 'flv', + 'title': 'AdAPPter_Hyundai_demo (1)', + 'duration': 74, + 'tbr': 1378, + 'width': 640, + 'height': 400, + }, + }, { + 'md5': '979b4da2655c4bc2d81aeb915a8c5014', + 'info_dict': { + 'id': '29907998', + 'ext': 'flv', + 'title': 'AdAPPter_Hyundai_demo (2)', + 'duration': 34, + 'width': 854, + 'height': 480, + 'tbr': 516, + }, + }], + 'params': { + 'playlistend': 2, + }, + '_skip': 'Blocked in the US [sic]', + } + + def _real_extract(self, url): + pl_id = self._match_id(url) + vast_doc = self._download_xml(url, pl_id) + + title = vast_doc.find('.//AdTitle').text + media = vast_doc.find('.//MediaFile').text + info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL') + + doc = self._download_xml(info_url, pl_id, 'Downloading video info') + entries = [{ + '_type': 'video', + 'id': a.attrib['id'], + 'title': '%s (%s)' % (title, a.attrib['assetID']), + 'url': a.attrib['URL'], + 'duration': int_or_none(a.attrib.get('length')), + 'tbr': int_or_none(a.attrib.get('bitrate')), + 'height': int_or_none(a.attrib.get('height')), + 'width': int_or_none(a.attrib.get('width')), + } for a in doc.findall('.//AdditionalAssets/asset')] + + return { + '_type': 'playlist', + 'id': pl_id, + 'title': title, + 'entries': entries, + } diff --git a/youtube_dl/extractor/servus.py b/youtube_dl/extractor/servus.py new file mode 100644 index 000000000..e579d42cf --- /dev/null +++ b/youtube_dl/extractor/servus.py @@ -0,0 +1,56 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class ServusIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)/(?P[aA]{2}-\w+|\d+-\d+)' + _TESTS = [{ + 'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/', + 'md5': '3e1dd16775aa8d5cbef23628cfffc1f4', + 'info_dict': { + 'id': 'AA-1T6VBU5PW1W12', + 'ext': 'mp4', + 'title': 'Die Grünen aus Sicht des Volkes', + 'description': 'md5:1247204d85783afe3682644398ff2ec4', + 'thumbnail': r're:^https?://.*\.jpg', + } + }, { + 'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/', + 'only_matching': True, + }, { + 'url': 'https://www.servus.com/tv/videos/aa-1t6vbu5pw1w12/', + 'only_matching': True, + }, { + 'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url).upper() + webpage = self._download_webpage(url, video_id) + + title = self._search_regex( + (r'videoLabel\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + r'<h\d+[^>]+\bclass=["\']heading--(?:one|two)["\'][^>]*>(?P<title>[^<]+)'), + webpage, 'title', default=None, + group='title') or self._og_search_title(webpage) + title = re.sub(r'\s*-\s*Servus TV\s*$', '', title) + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage) + + formats = self._extract_m3u8_formats( + 'https://stv.rbmbtnx.net/api/v1/manifests/%s.m3u8' % video_id, + video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'formats': formats, + } diff --git a/youtube_dl/extractor/sevenplus.py b/youtube_dl/extractor/sevenplus.py new file mode 100644 index 000000000..84568ac69 --- /dev/null +++ b/youtube_dl/extractor/sevenplus.py @@ -0,0 +1,84 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .brightcove import BrightcoveNewIE +from ..compat import compat_str +from ..utils import ( + try_get, + update_url_query, +) + + +class SevenPlusIE(BrightcoveNewIE): + IE_NAME = '7plus' + _VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))' + _TESTS = [{ + 'url': 'https://7plus.com.au/MTYS?episode-id=MTYS7-003', + 'info_dict': { + 'id': 'MTYS7-003', + 'ext': 'mp4', + 'title': 'S7 E3 - Wind Surf', + 'description': 'md5:29c6a69f21accda7601278f81b46483d', + 'uploader_id': '5303576322001', + 'upload_date': '20171201', + 'timestamp': 1512106377, + 'series': 'Mighty Ships', + 'season_number': 7, + 'episode_number': 3, + 'episode': 'Wind Surf', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + } + }, { + 'url': 'https://7plus.com.au/UUUU?episode-id=AUMS43-001', + 'only_matching': True, + }] + + def _real_extract(self, url): + path, episode_id = re.match(self._VALID_URL, url).groups() + + media = self._download_json( + 'https://videoservice.swm.digital/playback', episode_id, query={ + 'appId': '7plus', + 'deviceType': 'web', + 'platformType': 'web', + 'accountId': 5303576322001, + 'referenceId': 'ref:' + episode_id, + 'deliveryId': 'csai', + 'videoType': 'vod', + })['media'] + + for source in media.get('sources', {}): + src = source.get('src') + if not src: + continue + source['src'] = update_url_query(src, {'rule': ''}) + + info = self._parse_brightcove_metadata(media, episode_id) + + content = self._download_json( + 'https://component-cdn.swm.digital/content/' + path, + episode_id, headers={ + 'market-id': 4, + }, fatal=False) or {} + for item in content.get('items', {}): + if item.get('componentData', {}).get('componentType') == 'infoPanel': + for src_key, dst_key in [('title', 'title'), ('shortSynopsis', 'description')]: + value = item.get(src_key) + if value: + info[dst_key] = value + info['series'] = try_get( + item, lambda x: x['seriesLogo']['name'], compat_str) + mobj = re.search(r'^S(\d+)\s+E(\d+)\s+-\s+(.+)$', info['title']) + if mobj: + info.update({ + 'season_number': int(mobj.group(1)), + 'episode_number': int(mobj.group(2)), + 'episode': mobj.group(3), + }) + + return info diff --git a/youtube_dl/extractor/sexu.py b/youtube_dl/extractor/sexu.py new file mode 100644 index 000000000..3df51520b --- /dev/null +++ b/youtube_dl/extractor/sexu.py @@ -0,0 +1,63 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class SexuIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?sexu\.com/(?P<id>\d+)' + _TEST = { + 'url': 'http://sexu.com/961791/', + 'md5': 'ff615aca9691053c94f8f10d96cd7884', + 'info_dict': { + 'id': '961791', + 'ext': 'mp4', + 'title': 'md5:4d05a19a5fc049a63dbbaf05fb71d91b', + 'description': 'md5:2b75327061310a3afb3fbd7d09e2e403', + 'categories': list, # NSFW + 'thumbnail': r're:https?://.*\.jpg$', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + jwvideo = self._parse_json( + self._search_regex(r'\.setup\(\s*({.+?})\s*\);', webpage, 'jwvideo'), + video_id) + + sources = jwvideo['sources'] + + formats = [{ + 'url': source['file'].replace('\\', ''), + 'format_id': source.get('label'), + 'height': int(self._search_regex( + r'^(\d+)[pP]', source.get('label', ''), 'height', + default=None)), + } for source in sources if source.get('file')] + self._sort_formats(formats) + + title = self._html_search_regex( + r'<title>([^<]+)\s*-\s*Sexu\.Com', webpage, 'title') + + description = self._html_search_meta( + 'description', webpage, 'description') + + thumbnail = jwvideo.get('image') + + categories_str = self._html_search_meta( + 'keywords', webpage, 'categories') + categories = ( + None if categories_str is None + else categories_str.split(',')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'categories': categories, + 'formats': formats, + 'age_limit': 18, + } diff --git a/youtube_dl/extractor/seznamzpravy.py b/youtube_dl/extractor/seznamzpravy.py new file mode 100644 index 000000000..7a1c7e38b --- /dev/null +++ b/youtube_dl/extractor/seznamzpravy.py @@ -0,0 +1,169 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_str, + compat_urllib_parse_urlparse, +) +from ..utils import ( + urljoin, + int_or_none, + parse_codecs, + try_get, +) + + +def _raw_id(src_url): + return compat_urllib_parse_urlparse(src_url).path.split('/')[-1] + + +class SeznamZpravyIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?seznamzpravy\.cz/iframe/player\?.*\bsrc=' + _TESTS = [{ + 'url': 'https://www.seznamzpravy.cz/iframe/player?duration=241&serviceSlug=zpravy&src=https%3A%2F%2Fv39-a.sdn.szn.cz%2Fv_39%2Fvmd%2F5999c902ea707c67d8e267a9%3Ffl%3Dmdk%2C432f65a0%7C&itemType=video&autoPlay=false&title=Sv%C4%9Bt%20bez%20obalu%3A%20%C4%8Ce%C5%A1t%C3%AD%20voj%C3%A1ci%20na%20mis%C3%ADch%20(kr%C3%A1tk%C3%A1%20verze)&series=Sv%C4%9Bt%20bez%20obalu&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_F_I%2FR5puJ.jpeg%3Ffl%3Dcro%2C0%2C0%2C1920%2C1080%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=1920&height=1080&cutFrom=0&cutTo=0&splVersion=VOD&contentId=170889&contextId=35990&showAdvert=true&collocation=&autoplayPossible=true&embed=&isVideoTooShortForPreroll=false&isVideoTooLongForPostroll=true&videoCommentOpKey=&videoCommentId=&version=4.0.76&dotService=zpravy&gemiusPrismIdentifier=bVc1ZIb_Qax4W2v5xOPGpMeCP31kFfrTzj0SqPTLh_b.Z7&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5§ionPrefixPreroll=%2Fzpravy', + 'info_dict': { + 'id': '170889', + 'ext': 'mp4', + 'title': 'Svět bez obalu: Čeští vojáci na misích (krátká verze)', + 'thumbnail': r're:^https?://.*\.jpe?g', + 'duration': 241, + 'series': 'Svět bez obalu', + }, + 'params': { + 'skip_download': True, + }, + }, { + # with Location key + 'url': 'https://www.seznamzpravy.cz/iframe/player?duration=null&serviceSlug=zpravy&src=https%3A%2F%2Flive-a.sdn.szn.cz%2Fv_39%2F59e468fe454f8472a96af9fa%3Ffl%3Dmdk%2C5c1e2840%7C&itemType=livevod&autoPlay=false&title=P%C5%99edseda%20KDU-%C4%8CSL%20Pavel%20B%C4%9Blobr%C3%A1dek%20ve%20volebn%C3%AD%20V%C3%BDzv%C4%9B%20Seznamu&series=V%C3%BDzva&serviceName=Seznam%20Zpr%C3%A1vy&poster=%2F%2Fd39-a.sdn.szn.cz%2Fd_39%2Fc_img_G_J%2FjTBCs.jpeg%3Ffl%3Dcro%2C0%2C0%2C1280%2C720%7Cres%2C1200%2C%2C1%7Cjpg%2C80%2C%2C1&width=16&height=9&cutFrom=0&cutTo=0&splVersion=VOD&contentId=185688&contextId=38489&showAdvert=true&collocation=&hideFullScreen=false&hideSubtitles=false&embed=&isVideoTooShortForPreroll=false&isVideoTooShortForPreroll2=false&isVideoTooLongForPostroll=false&fakePostrollZoneID=seznam.clanky.zpravy.preroll&fakePrerollZoneID=seznam.clanky.zpravy.preroll&videoCommentId=&trim=default_16x9&noPrerollVideoLength=30&noPreroll2VideoLength=undefined&noMidrollVideoLength=0&noPostrollVideoLength=999999&autoplayPossible=true&version=5.0.41&dotService=zpravy&gemiusPrismIdentifier=zD3g7byfW5ekpXmxTVLaq5Srjw5i4hsYo0HY1aBwIe..27&zoneIdPreroll=seznam.pack.videospot&skipOffsetPreroll=5§ionPrefixPreroll=%2Fzpravy%2Fvyzva&zoneIdPostroll=seznam.pack.videospot&skipOffsetPostroll=5§ionPrefixPostroll=%2Fzpravy%2Fvyzva®ression=false', + 'info_dict': { + 'id': '185688', + 'ext': 'mp4', + 'title': 'Předseda KDU-ČSL Pavel Bělobrádek ve volební Výzvě Seznamu', + 'thumbnail': r're:^https?://.*\.jpe?g', + 'series': 'Výzva', + }, + 'params': { + 'skip_download': True, + }, + }] + + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') for mobj in re.finditer( + r']+\bsrc=(["\'])(?P(?:https?:)?//(?:www\.)?seznamzpravy\.cz/iframe/player\?.*?)\1', + webpage)] + + def _extract_sdn_formats(self, sdn_url, video_id): + sdn_data = self._download_json(sdn_url, video_id) + + if sdn_data.get('Location'): + sdn_url = sdn_data['Location'] + sdn_data = self._download_json(sdn_url, video_id) + + formats = [] + mp4_formats = try_get(sdn_data, lambda x: x['data']['mp4'], dict) or {} + for format_id, format_data in mp4_formats.items(): + relative_url = format_data.get('url') + if not relative_url: + continue + + try: + width, height = format_data.get('resolution') + except (TypeError, ValueError): + width, height = None, None + + f = { + 'url': urljoin(sdn_url, relative_url), + 'format_id': 'http-%s' % format_id, + 'tbr': int_or_none(format_data.get('bandwidth'), scale=1000), + 'width': int_or_none(width), + 'height': int_or_none(height), + } + f.update(parse_codecs(format_data.get('codec'))) + formats.append(f) + + pls = sdn_data.get('pls', {}) + + def get_url(format_id): + return try_get(pls, lambda x: x[format_id]['url'], compat_str) + + dash_rel_url = get_url('dash') + if dash_rel_url: + formats.extend(self._extract_mpd_formats( + urljoin(sdn_url, dash_rel_url), video_id, mpd_id='dash', + fatal=False)) + + hls_rel_url = get_url('hls') + if hls_rel_url: + formats.extend(self._extract_m3u8_formats( + urljoin(sdn_url, hls_rel_url), video_id, ext='mp4', + m3u8_id='hls', fatal=False)) + + self._sort_formats(formats) + return formats + + def _real_extract(self, url): + params = compat_parse_qs(compat_urllib_parse_urlparse(url).query) + + src = params['src'][0] + title = params['title'][0] + video_id = params.get('contentId', [_raw_id(src)])[0] + formats = self._extract_sdn_formats(src + 'spl2,2,VOD', video_id) + + duration = int_or_none(params.get('duration', [None])[0]) + series = params.get('series', [None])[0] + thumbnail = params.get('poster', [None])[0] + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'duration': duration, + 'series': series, + 'formats': formats, + } + + +class SeznamZpravyArticleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?:seznam\.cz/zpravy|seznamzpravy\.cz)/clanek/(?:[^/?#&]+)-(?P\d+)' + _API_URL = 'https://apizpravy.seznam.cz/' + + _TESTS = [{ + # two videos on one page, with SDN URL + 'url': 'https://www.seznamzpravy.cz/clanek/jejich-svet-na-nas-utoci-je-lepsi-branit-se-na-jejich-pisecku-rika-reziser-a-major-v-zaloze-marhoul-35990', + 'info_dict': { + 'id': '35990', + 'title': 'md5:6011c877a36905f28f271fcd8dcdb0f2', + 'description': 'md5:933f7b06fa337a814ba199d3596d27ba', + }, + 'playlist_count': 2, + }, { + # video with live stream URL + 'url': 'https://www.seznam.cz/zpravy/clanek/znovu-do-vlady-s-ano-pavel-belobradek-ve-volebnim-specialu-seznamu-38489', + 'info_dict': { + 'id': '38489', + 'title': 'md5:8fa1afdc36fd378cf0eba2b74c5aca60', + 'description': 'md5:428e7926a1a81986ec7eb23078004fb4', + }, + 'playlist_count': 1, + }] + + def _real_extract(self, url): + article_id = self._match_id(url) + + webpage = self._download_webpage(url, article_id) + + info = self._search_json_ld(webpage, article_id, default={}) + + title = info.get('title') or self._og_search_title(webpage, fatal=False) + description = info.get('description') or self._og_search_description(webpage) + + return self.playlist_result([ + self.url_result(entry_url, ie=SeznamZpravyIE.ie_key()) + for entry_url in SeznamZpravyIE._extract_urls(webpage)], + article_id, title, description) diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py new file mode 100644 index 000000000..5c2a6206b --- /dev/null +++ b/youtube_dl/extractor/shahid.py @@ -0,0 +1,215 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json +import math +import re + +from .aws import AWSIE +from ..compat import compat_HTTPError +from ..utils import ( + clean_html, + ExtractorError, + InAdvancePagedList, + int_or_none, + parse_iso8601, + str_or_none, + urlencode_postdata, +) + + +class ShahidBaseIE(AWSIE): + _AWS_PROXY_HOST = 'api2.shahid.net' + _AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh' + + def _handle_error(self, e): + fail_data = self._parse_json( + e.cause.read().decode('utf-8'), None, fatal=False) + if fail_data: + faults = fail_data.get('faults', []) + faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')]) + if faults_message: + raise ExtractorError(faults_message, expected=True) + + def _call_api(self, path, video_id, request=None): + query = {} + if request: + query['request'] = json.dumps(request) + try: + return self._aws_execute_api({ + 'uri': '/proxy/v2/' + path, + 'access_key': 'AKIAI6X4TYCIXM2B7MUQ', + 'secret_key': '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', + }, video_id, query) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + self._handle_error(e) + raise + + +class ShahidIE(ShahidBaseIE): + _NETRC_MACHINE = 'shahid' + _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?Pepisode|clip|movie)-(?P\d+)' + _TESTS = [{ + 'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AC%D9%84%D8%B3-%D8%A7%D9%84%D8%B4%D8%A8%D8%A7%D8%A8-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-275286', + 'info_dict': { + 'id': '275286', + 'ext': 'mp4', + 'title': 'مجلس الشباب الموسم 1 كليب 1', + 'timestamp': 1506988800, + 'upload_date': '20171003', + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'https://shahid.mbc.net/ar/movies/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9/movie-151746', + 'only_matching': True + }, { + # shahid plus subscriber only + 'url': 'https://shahid.mbc.net/ar/series/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/episode-90511', + 'only_matching': True + }] + + def _real_initialize(self): + email, password = self._get_login_info() + if email is None: + return + + try: + user_data = self._download_json( + 'https://shahid.mbc.net/wd/service/users/login', + None, 'Logging in', data=json.dumps({ + 'email': email, + 'password': password, + 'basic': 'false', + }).encode('utf-8'), headers={ + 'Content-Type': 'application/json; charset=UTF-8', + })['user'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + self._handle_error(e) + raise + + self._download_webpage( + 'https://shahid.mbc.net/populateContext', + None, 'Populate Context', data=urlencode_postdata({ + 'firstName': user_data['firstName'], + 'lastName': user_data['lastName'], + 'userName': user_data['email'], + 'csg_user_name': user_data['email'], + 'subscriberId': user_data['id'], + 'sessionId': user_data['sessionId'], + })) + + def _real_extract(self, url): + page_type, video_id = re.match(self._VALID_URL, url).groups() + if page_type == 'clip': + page_type = 'episode' + + playout = self._call_api( + 'playout/url/' + video_id, video_id)['playout'] + + if playout.get('drm'): + raise ExtractorError('This video is DRM protected.', expected=True) + + formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4') + self._sort_formats(formats) + + # video = self._call_api( + # 'product/id', video_id, { + # 'id': video_id, + # 'productType': 'ASSET', + # 'productSubType': page_type.upper() + # })['productModel'] + + response = self._download_json( + 'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id), + video_id, 'Downloading video JSON', query={ + 'apiKey': 'sh@hid0nlin3', + 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=', + }) + data = response.get('data', {}) + error = data.get('error') + if error: + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())), + expected=True) + + video = data[page_type] + title = video['title'] + categories = [ + category['name'] + for category in video.get('genres', []) if 'name' in category] + + return { + 'id': video_id, + 'title': title, + 'description': video.get('description'), + 'thumbnail': video.get('thumbnailUrl'), + 'duration': int_or_none(video.get('duration')), + 'timestamp': parse_iso8601(video.get('referenceDate')), + 'categories': categories, + 'series': video.get('showTitle') or video.get('showName'), + 'season': video.get('seasonTitle'), + 'season_number': int_or_none(video.get('seasonNumber')), + 'season_id': str_or_none(video.get('seasonId')), + 'episode_number': int_or_none(video.get('number')), + 'episode_id': video_id, + 'formats': formats, + } + + +class ShahidShowIE(ShahidBaseIE): + _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:show|serie)s/[^/]+/(?:show|series)-(?P\d+)' + _TESTS = [{ + 'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187', + 'info_dict': { + 'id': '79187', + 'title': 'رامز قرش البحر', + 'description': 'md5:c88fa7e0f02b0abd39d417aee0d046ff', + }, + 'playlist_mincount': 32, + }, { + 'url': 'https://shahid.mbc.net/ar/series/How-to-live-Longer-(The-Big-Think)/series-291861', + 'only_matching': True + }] + _PAGE_SIZE = 30 + + def _real_extract(self, url): + show_id = self._match_id(url) + + product = self._call_api( + 'playableAsset', show_id, {'showId': show_id})['productModel'] + playlist = product['playlist'] + playlist_id = playlist['id'] + show = product.get('show', {}) + + def page_func(page_num): + playlist = self._call_api( + 'product/playlist', show_id, { + 'playListId': playlist_id, + 'pageNumber': page_num, + 'pageSize': 30, + 'sorts': [{ + 'order': 'DESC', + 'type': 'SORTDATE' + }], + }) + for product in playlist.get('productList', {}).get('products', []): + product_url = product.get('productUrl', []).get('url') + if not product_url: + continue + yield self.url_result( + product_url, 'Shahid', + str_or_none(product.get('id')), + product.get('title')) + + entries = InAdvancePagedList( + page_func, + math.ceil(playlist['count'] / self._PAGE_SIZE), + self._PAGE_SIZE) + + return self.playlist_result( + entries, show_id, show.get('title'), show.get('description')) diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py new file mode 100644 index 000000000..ff575f592 --- /dev/null +++ b/youtube_dl/extractor/shared.py @@ -0,0 +1,127 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_b64decode +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, + KNOWN_EXTENSIONS, + parse_filesize, + url_or_none, + urlencode_postdata, +) + + +class SharedBaseIE(InfoExtractor): + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage, urlh = self._download_webpage_handle(url, video_id) + + if self._FILE_NOT_FOUND in webpage: + raise ExtractorError( + 'Video %s does not exist' % video_id, expected=True) + + video_url = self._extract_video_url(webpage, video_id, url) + + title = self._extract_title(webpage) + filesize = int_or_none(self._extract_filesize(webpage)) + + return { + 'id': video_id, + 'url': video_url, + 'ext': 'mp4', + 'filesize': filesize, + 'title': title, + } + + def _extract_title(self, webpage): + return compat_b64decode(self._html_search_meta( + 'full:title', webpage, 'title')).decode('utf-8') + + def _extract_filesize(self, webpage): + return self._html_search_meta( + 'full:size', webpage, 'file size', fatal=False) + + +class SharedIE(SharedBaseIE): + IE_DESC = 'shared.sx' + _VALID_URL = r'https?://shared\.sx/(?P[\da-z]{10})' + _FILE_NOT_FOUND = '>File does not exist<' + + _TEST = { + 'url': 'http://shared.sx/0060718775', + 'md5': '106fefed92a8a2adb8c98e6a0652f49b', + 'info_dict': { + 'id': '0060718775', + 'ext': 'mp4', + 'title': 'Bmp4', + 'filesize': 1720110, + }, + } + + def _extract_video_url(self, webpage, video_id, url): + download_form = self._hidden_inputs(webpage) + + video_page = self._download_webpage( + url, video_id, 'Downloading video page', + data=urlencode_postdata(download_form), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + 'Referer': url, + }) + + video_url = self._html_search_regex( + r'data-url=(["\'])(?P(?:(?!\1).)+)\1', + video_page, 'video URL', group='url') + + return video_url + + +class VivoIE(SharedBaseIE): + IE_DESC = 'vivo.sx' + _VALID_URL = r'https?://vivo\.sx/(?P[\da-z]{10})' + _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed' + + _TEST = { + 'url': 'http://vivo.sx/d7ddda0e78', + 'md5': '15b3af41be0b4fe01f4df075c2678b2c', + 'info_dict': { + 'id': 'd7ddda0e78', + 'ext': 'mp4', + 'title': 'Chicken', + 'filesize': 515659, + }, + } + + def _extract_title(self, webpage): + title = self._html_search_regex( + r'data-name\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, + 'title', default=None, group='title') + if title: + ext = determine_ext(title) + if ext.lower() in KNOWN_EXTENSIONS: + title = title.rpartition('.' + ext)[0] + return title + return self._og_search_title(webpage) + + def _extract_filesize(self, webpage): + return parse_filesize(self._search_regex( + r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)', + webpage, 'filesize', fatal=False)) + + def _extract_video_url(self, webpage, video_id, url): + def decode_url(encoded_url): + return compat_b64decode(encoded_url).decode('utf-8') + + stream_url = url_or_none(decode_url(self._search_regex( + r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, + 'stream url', default=None, group='url'))) + if stream_url: + return stream_url + return self._parse_json( + self._search_regex( + r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'stream', group='url'), + video_id, transform_source=decode_url)[0] diff --git a/youtube_dl/extractor/showroomlive.py b/youtube_dl/extractor/showroomlive.py new file mode 100644 index 000000000..efd9d561f --- /dev/null +++ b/youtube_dl/extractor/showroomlive.py @@ -0,0 +1,84 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + ExtractorError, + int_or_none, + urljoin, +) + + +class ShowRoomLiveIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?showroom-live\.com/(?!onlive|timetable|event|campaign|news|ranking|room)(?P<id>[^/?#&]+)' + _TEST = { + 'url': 'https://www.showroom-live.com/48_Nana_Okada', + 'only_matching': True, + } + + def _real_extract(self, url): + broadcaster_id = self._match_id(url) + + webpage = self._download_webpage(url, broadcaster_id) + + room_id = self._search_regex( + (r'SrGlobal\.roomId\s*=\s*(\d+)', + r'(?:profile|room)\?room_id\=(\d+)'), webpage, 'room_id') + + room = self._download_json( + urljoin(url, '/api/room/profile?room_id=%s' % room_id), + broadcaster_id) + + is_live = room.get('is_onlive') + if is_live is not True: + raise ExtractorError('%s is offline' % broadcaster_id, expected=True) + + uploader = room.get('performer_name') or broadcaster_id + title = room.get('room_name') or room.get('main_name') or uploader + + streaming_url_list = self._download_json( + urljoin(url, '/api/live/streaming_url?room_id=%s' % room_id), + broadcaster_id)['streaming_url_list'] + + formats = [] + for stream in streaming_url_list: + stream_url = stream.get('url') + if not stream_url: + continue + stream_type = stream.get('type') + if stream_type == 'hls': + m3u8_formats = self._extract_m3u8_formats( + stream_url, broadcaster_id, ext='mp4', m3u8_id='hls', + live=True) + for f in m3u8_formats: + f['quality'] = int_or_none(stream.get('quality', 100)) + formats.extend(m3u8_formats) + elif stream_type == 'rtmp': + stream_name = stream.get('stream_name') + if not stream_name: + continue + formats.append({ + 'url': stream_url, + 'play_path': stream_name, + 'page_url': url, + 'player_url': 'https://www.showroom-live.com/assets/swf/v3/ShowRoomLive.swf', + 'rtmp_live': True, + 'ext': 'flv', + 'format_id': 'rtmp', + 'format_note': stream.get('label'), + 'quality': int_or_none(stream.get('quality', 100)), + }) + self._sort_formats(formats) + + return { + 'id': compat_str(room.get('live_id') or broadcaster_id), + 'title': self._live_title(title), + 'description': room.get('description'), + 'timestamp': int_or_none(room.get('current_live_started_at')), + 'uploader': uploader, + 'uploader_id': broadcaster_id, + 'view_count': int_or_none(room.get('view_num')), + 'formats': formats, + 'is_live': True, + } diff --git a/youtube_dl/extractor/sina.py b/youtube_dl/extractor/sina.py new file mode 100644 index 000000000..07b766b4a --- /dev/null +++ b/youtube_dl/extractor/sina.py @@ -0,0 +1,115 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + HEADRequest, + ExtractorError, + int_or_none, + update_url_query, + qualities, + get_element_by_attribute, + clean_html, +) + + +class SinaIE(InfoExtractor): + _VALID_URL = r'''(?x)https?://(?:.*?\.)?video\.sina\.com\.cn/ + (?: + (?:view/|.*\#)(?P<video_id>\d+)| + .+?/(?P<pseudo_id>[^/?#]+)(?:\.s?html)| + # This is used by external sites like Weibo + api/sinawebApi/outplay.php/(?P<token>.+?)\.swf + ) + ''' + + _TESTS = [ + { + 'url': 'http://video.sina.com.cn/news/spj/topvideoes20160504/?opsubject_id=top1#250576622', + 'md5': 'd38433e2fc886007729735650ae4b3e9', + 'info_dict': { + 'id': '250576622', + 'ext': 'mp4', + 'title': '现场:克鲁兹宣布退选 特朗普将稳获提名', + } + }, + { + 'url': 'http://video.sina.com.cn/v/b/101314253-1290078633.html', + 'info_dict': { + 'id': '101314253', + 'ext': 'flv', + 'title': '军方提高对朝情报监视级别', + }, + 'skip': 'the page does not exist or has been deleted', + }, + { + 'url': 'http://video.sina.com.cn/view/250587748.html', + 'md5': '3d1807a25c775092aab3bc157fff49b4', + 'info_dict': { + 'id': '250587748', + 'ext': 'mp4', + 'title': '瞬间泪目:8年前汶川地震珍贵视频首曝光', + }, + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + video_id = mobj.group('video_id') + if not video_id: + if mobj.group('token') is not None: + # The video id is in the redirected url + self.to_screen('Getting video id') + request = HEADRequest(url) + _, urlh = self._download_webpage_handle(request, 'NA', False) + return self._real_extract(urlh.geturl()) + else: + pseudo_id = mobj.group('pseudo_id') + webpage = self._download_webpage(url, pseudo_id) + error = get_element_by_attribute('class', 'errtitle', webpage) + if error: + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, clean_html(error)), expected=True) + video_id = self._search_regex( + r"video_id\s*:\s*'(\d+)'", webpage, 'video id') + + video_data = self._download_json( + 'http://s.video.sina.com.cn/video/h5play', + video_id, query={'video_id': video_id}) + if video_data['code'] != 1: + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, video_data['message']), expected=True) + else: + video_data = video_data['data'] + title = video_data['title'] + description = video_data.get('description') + if description: + description = description.strip() + + preference = qualities(['cif', 'sd', 'hd', 'fhd', 'ffd']) + formats = [] + for quality_id, quality in video_data.get('videos', {}).get('mp4', {}).items(): + file_api = quality.get('file_api') + file_id = quality.get('file_id') + if not file_api or not file_id: + continue + formats.append({ + 'format_id': quality_id, + 'url': update_url_query(file_api, {'vid': file_id}), + 'preference': preference(quality_id), + 'ext': 'mp4', + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': video_data.get('image'), + 'duration': int_or_none(video_data.get('length')), + 'timestamp': int_or_none(video_data.get('create_time')), + 'formats': formats, + } diff --git a/youtube_dl/extractor/sixplay.py b/youtube_dl/extractor/sixplay.py new file mode 100644 index 000000000..7ec66ecf3 --- /dev/null +++ b/youtube_dl/extractor/sixplay.py @@ -0,0 +1,129 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_str, + compat_urllib_parse_urlparse, +) +from ..utils import ( + determine_ext, + int_or_none, + try_get, + qualities, +) + + +class SixPlayIE(InfoExtractor): + IE_NAME = '6play' + _VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay\.be|play\.rtl\.hr|rtlmost\.hu)/.+?-c_)(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051', + 'md5': '31fcd112637baa0c2ab92c4fcd8baf27', + 'info_dict': { + 'id': '12041051', + 'ext': 'mp4', + 'title': 'Le but qui a marqué l\'histoire du football français !', + 'description': 'md5:b59e7e841d646ef1eb42a7868eb6a851', + }, + }, { + 'url': 'https://www.rtlplay.be/rtl-info-13h-p_8551/les-titres-du-rtlinfo-13h-c_12045869', + 'only_matching': True, + }, { + 'url': 'https://play.rtl.hr/pj-masks-p_9455/epizoda-34-sezona-1-catboyevo-cudo-na-dva-kotaca-c_11984989', + 'only_matching': True, + }, { + 'url': 'https://www.rtlmost.hu/megtorve-p_14167/megtorve-6-resz-c_12397787', + 'only_matching': True, + }] + + def _real_extract(self, url): + domain, video_id = re.search(self._VALID_URL, url).groups() + service, consumer_name = { + '6play.fr': ('6play', 'm6web'), + 'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'), + 'play.rtl.hr': ('rtlhr_rtl_play', 'rtlhr'), + 'rtlmost.hu': ('rtlhu_rtl_most', 'rtlhu'), + }.get(domain, ('6play', 'm6web')) + + data = self._download_json( + 'https://pc.middleware.6play.fr/6play/v2/platforms/m6group_web/services/%s/videos/clip_%s' % (service, video_id), + video_id, headers={ + 'x-customer-name': consumer_name + }, query={ + 'csa': 5, + 'with': 'clips', + }) + + clip_data = data['clips'][0] + title = clip_data['title'] + + urls = [] + quality_key = qualities(['lq', 'sd', 'hq', 'hd']) + formats = [] + subtitles = {} + assets = clip_data.get('assets') or [] + for asset in assets: + asset_url = asset.get('full_physical_path') + protocol = asset.get('protocol') + if not asset_url or ((protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264') and not ('_drmnp.ism/' in asset_url or '_unpnp.ism/' in asset_url)) or asset_url in urls: + continue + urls.append(asset_url) + container = asset.get('video_container') + ext = determine_ext(asset_url) + if protocol == 'http_subtitle' or ext == 'vtt': + subtitles.setdefault('fr', []).append({'url': asset_url}) + continue + if container == 'm3u8' or ext == 'm3u8': + if protocol == 'usp': + if compat_parse_qs(compat_urllib_parse_urlparse(asset_url).query).get('token', [None])[0]: + urlh = self._request_webpage( + asset_url, video_id, fatal=False, + headers=self.geo_verification_headers()) + if not urlh: + continue + asset_url = urlh.geturl() + asset_url = asset_url.replace('_drmnp.ism/', '_unpnp.ism/') + for i in range(3, 0, -1): + asset_url = asset_url = asset_url.replace('_sd1/', '_sd%d/' % i) + m3u8_formats = self._extract_m3u8_formats( + asset_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False) + formats.extend(m3u8_formats) + formats.extend(self._extract_mpd_formats( + asset_url.replace('.m3u8', '.mpd'), + video_id, mpd_id='dash', fatal=False)) + if m3u8_formats: + break + else: + formats.extend(self._extract_m3u8_formats( + asset_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif container == 'mp4' or ext == 'mp4': + quality = asset.get('video_quality') + formats.append({ + 'url': asset_url, + 'format_id': quality, + 'quality': quality_key(quality), + 'ext': ext, + }) + self._sort_formats(formats) + + def get(getter): + for src in (data, clip_data): + v = try_get(src, getter, compat_str) + if v: + return v + + return { + 'id': video_id, + 'title': title, + 'description': get(lambda x: x['description']), + 'duration': int_or_none(clip_data.get('duration')), + 'series': get(lambda x: x['program']['title']), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/youtube_dl/extractor/sky.py b/youtube_dl/extractor/sky.py new file mode 100644 index 000000000..ea30d6e62 --- /dev/null +++ b/youtube_dl/extractor/sky.py @@ -0,0 +1,70 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + extract_attributes, + smuggle_url, + strip_or_none, + urljoin, +) + + +class SkyBaseIE(InfoExtractor): + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_data = extract_attributes(self._search_regex( + r'(<div.+?class="[^"]*sdc-article-video__media-ooyala[^"]*"[^>]+>)', + webpage, 'video data')) + + video_url = 'ooyala:%s' % video_data['data-video-id'] + if video_data.get('data-token-required') == 'true': + token_fetch_options = self._parse_json(video_data.get( + 'data-token-fetch-options', '{}'), video_id, fatal=False) or {} + token_fetch_url = token_fetch_options.get('url') + if token_fetch_url: + embed_token = self._download_webpage(urljoin( + url, token_fetch_url), video_id, fatal=False) + if embed_token: + video_url = smuggle_url( + video_url, {'embed_token': embed_token.strip('"')}) + + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': video_url, + 'title': self._og_search_title(webpage), + 'description': strip_or_none(self._og_search_description(webpage)), + 'ie_key': 'Ooyala', + } + + +class SkySportsIE(SkyBaseIE): + _VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine', + 'md5': '77d59166cddc8d3cb7b13e35eaf0f5ec', + 'info_dict': { + 'id': 'o3eWJnNDE6l7kfNO8BOoBlRxXRQ4ANNQ', + 'ext': 'mp4', + 'title': 'Bale: It\'s our time to shine', + 'description': 'md5:e88bda94ae15f7720c5cb467e777bb6d', + }, + 'add_ie': ['Ooyala'], + } + + +class SkyNewsIE(SkyBaseIE): + _VALID_URL = r'https?://news\.sky\.com/video/[0-9a-z-]+-(?P<id>[0-9]+)' + _TEST = { + 'url': 'https://news.sky.com/video/russian-plane-inspected-after-deadly-fire-11712962', + 'md5': 'd6327e581473cea9976a3236ded370cd', + 'info_dict': { + 'id': '1ua21xaDE6lCtZDmbYfl8kwsKLooJbNM', + 'ext': 'mp4', + 'title': 'Russian plane inspected after deadly fire', + 'description': 'The Russian Investigative Committee has released video of the wreckage of a passenger plane which caught fire near Moscow.', + }, + 'add_ie': ['Ooyala'], + } diff --git a/youtube_dl/extractor/skylinewebcams.py b/youtube_dl/extractor/skylinewebcams.py new file mode 100644 index 000000000..b7f8ac736 --- /dev/null +++ b/youtube_dl/extractor/skylinewebcams.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class SkylineWebcamsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?skylinewebcams\.com/[^/]+/webcam/(?:[^/]+/)+(?P<id>[^/]+)\.html' + _TEST = { + 'url': 'https://www.skylinewebcams.com/it/webcam/italia/lazio/roma/scalinata-piazza-di-spagna-barcaccia.html', + 'info_dict': { + 'id': 'scalinata-piazza-di-spagna-barcaccia', + 'ext': 'mp4', + 'title': 're:^Live Webcam Scalinata di Piazza di Spagna - La Barcaccia [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': 'Roma, veduta sulla Scalinata di Piazza di Spagna e sulla Barcaccia', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + stream_url = self._search_regex( + r'(?:url|source)\s*:\s*(["\'])(?P<url>(?:https?:)?//.+?\.m3u8.*?)\1', webpage, + 'stream url', group='url') + + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + + return { + 'id': video_id, + 'url': stream_url, + 'ext': 'mp4', + 'title': self._live_title(title), + 'description': description, + 'is_live': True, + } diff --git a/youtube_dl/extractor/skynewsarabia.py b/youtube_dl/extractor/skynewsarabia.py new file mode 100644 index 000000000..fffc9aa22 --- /dev/null +++ b/youtube_dl/extractor/skynewsarabia.py @@ -0,0 +1,117 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + parse_iso8601, + parse_duration, +) + + +class SkyNewsArabiaBaseIE(InfoExtractor): + _IMAGE_BASE_URL = 'http://www.skynewsarabia.com/web/images' + + def _call_api(self, path, value): + return self._download_json('http://api.skynewsarabia.com/web/rest/v2/%s/%s.json' % (path, value), value) + + def _get_limelight_media_id(self, url): + return self._search_regex(r'/media/[^/]+/([a-z0-9]{32})', url, 'limelight media id') + + def _get_image_url(self, image_path_template, width='1600', height='1200'): + return self._IMAGE_BASE_URL + image_path_template.format(width=width, height=height) + + def _extract_video_info(self, video_data): + video_id = compat_str(video_data['id']) + topic = video_data.get('topicTitle') + return { + '_type': 'url_transparent', + 'url': 'limelight:media:%s' % self._get_limelight_media_id(video_data['videoUrl'][0]['url']), + 'id': video_id, + 'title': video_data['headline'], + 'description': video_data.get('summary'), + 'thumbnail': self._get_image_url(video_data['mediaAsset']['imageUrl']), + 'timestamp': parse_iso8601(video_data.get('date')), + 'duration': parse_duration(video_data.get('runTime')), + 'tags': video_data.get('tags', []), + 'categories': [topic] if topic else [], + 'webpage_url': 'http://www.skynewsarabia.com/web/video/%s' % video_id, + 'ie_key': 'LimelightMedia', + } + + +class SkyNewsArabiaIE(SkyNewsArabiaBaseIE): + IE_NAME = 'skynewsarabia:video' + _VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/video/(?P<id>[0-9]+)' + _TEST = { + 'url': 'http://www.skynewsarabia.com/web/video/794902/%D9%86%D8%B5%D9%81-%D9%85%D9%84%D9%8A%D9%88%D9%86-%D9%85%D8%B5%D8%A8%D8%A7%D8%AD-%D8%B4%D8%AC%D8%B1%D8%A9-%D9%83%D8%B1%D9%8A%D8%B3%D9%85%D8%A7%D8%B3', + 'info_dict': { + 'id': '794902', + 'ext': 'flv', + 'title': 'نصف مليون مصباح على شجرة كريسماس', + 'description': 'md5:22f1b27f0850eeb10c7e59b1f16eb7c6', + 'upload_date': '20151128', + 'timestamp': 1448697198, + 'duration': 2119, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._call_api('video', video_id) + return self._extract_video_info(video_data) + + +class SkyNewsArabiaArticleIE(SkyNewsArabiaBaseIE): + IE_NAME = 'skynewsarabia:article' + _VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/article/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://www.skynewsarabia.com/web/article/794549/%D8%A7%D9%94%D8%AD%D8%AF%D8%A7%D8%AB-%D8%A7%D9%84%D8%B4%D8%B1%D9%82-%D8%A7%D9%84%D8%A7%D9%94%D9%88%D8%B3%D8%B7-%D8%AE%D8%B1%D9%8A%D8%B7%D8%A9-%D8%A7%D9%84%D8%A7%D9%94%D9%84%D8%B9%D8%A7%D8%A8-%D8%A7%D9%84%D8%B0%D9%83%D9%8A%D8%A9', + 'info_dict': { + 'id': '794549', + 'ext': 'flv', + 'title': 'بالفيديو.. ألعاب ذكية تحاكي واقع المنطقة', + 'description': 'md5:0c373d29919a851e080ee4edd0c5d97f', + 'upload_date': '20151126', + 'timestamp': 1448559336, + 'duration': 281.6, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, { + 'url': 'http://www.skynewsarabia.com/web/article/794844/%D8%A7%D8%B3%D8%AA%D9%87%D8%AF%D8%A7%D9%81-%D9%82%D9%88%D8%A7%D8%B1%D8%A8-%D8%A7%D9%94%D8%B3%D9%84%D8%AD%D8%A9-%D9%84%D9%85%D9%8A%D9%84%D9%8A%D8%B4%D9%8A%D8%A7%D8%AA-%D8%A7%D9%84%D8%AD%D9%88%D8%AB%D9%8A-%D9%88%D8%B5%D8%A7%D9%84%D8%AD', + 'info_dict': { + 'id': '794844', + 'title': 'إحباط تهريب أسلحة لميليشيات الحوثي وصالح بجنوب اليمن', + 'description': 'md5:5c927b8b2e805796e7f693538d96fc7e', + }, + 'playlist_mincount': 2, + }] + + def _real_extract(self, url): + article_id = self._match_id(url) + article_data = self._call_api('article', article_id) + media_asset = article_data['mediaAsset'] + if media_asset['type'] == 'VIDEO': + topic = article_data.get('topicTitle') + return { + '_type': 'url_transparent', + 'url': 'limelight:media:%s' % self._get_limelight_media_id(media_asset['videoUrl'][0]['url']), + 'id': article_id, + 'title': article_data['headline'], + 'description': article_data.get('summary'), + 'thumbnail': self._get_image_url(media_asset['imageUrl']), + 'timestamp': parse_iso8601(article_data.get('date')), + 'tags': article_data.get('tags', []), + 'categories': [topic] if topic else [], + 'webpage_url': url, + 'ie_key': 'LimelightMedia', + } + entries = [self._extract_video_info(item) for item in article_data.get('inlineItems', []) if item['type'] == 'VIDEO'] + return self.playlist_result(entries, article_id, article_data['headline'], article_data.get('summary')) diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py new file mode 100644 index 000000000..e89ebebe7 --- /dev/null +++ b/youtube_dl/extractor/slideshare.py @@ -0,0 +1,56 @@ +from __future__ import unicode_literals + +import re +import json + +from .common import InfoExtractor +from ..compat import ( + compat_urlparse, +) +from ..utils import ( + ExtractorError, + get_element_by_id, +) + + +class SlideshareIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)' + + _TEST = { + 'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity', + 'info_dict': { + 'id': '25665706', + 'ext': 'mp4', + 'title': 'Managing Scale and Complexity', + 'description': 'This was a keynote presentation at the NoSQL Now! 2013 Conference & Expo (http://www.nosqlnow.com). This presentation was given by Adrian Cockcroft from Netflix.', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + page_title = mobj.group('title') + webpage = self._download_webpage(url, page_title) + slideshare_obj = self._search_regex( + r'\$\.extend\(.*?slideshare_object,\s*(\{.*?\})\);', + webpage, 'slideshare object') + info = json.loads(slideshare_obj) + if info['slideshow']['type'] != 'video': + raise ExtractorError('Webpage type is "%s": only video extraction is supported for Slideshare' % info['slideshow']['type'], expected=True) + + doc = info['doc'] + bucket = info['jsplayer']['video_bucket'] + ext = info['jsplayer']['video_extension'] + video_url = compat_urlparse.urljoin(bucket, doc + '-SD.' + ext) + description = get_element_by_id('slideshow-description-paragraph', webpage) or self._html_search_regex( + r'(?s)<p[^>]+itemprop="description"[^>]*>(.+?)</p>', webpage, + 'description', fatal=False) + + return { + '_type': 'video', + 'id': info['slideshow']['id'], + 'title': info['slideshow']['title'], + 'ext': ext, + 'url': video_url, + 'thumbnail': info['slideshow']['pin_image_url'], + 'description': description.strip() if description else None, + } diff --git a/youtube_dl/extractor/slideslive.py b/youtube_dl/extractor/slideslive.py new file mode 100644 index 000000000..ed84322c5 --- /dev/null +++ b/youtube_dl/extractor/slideslive.py @@ -0,0 +1,39 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class SlidesLiveIE(InfoExtractor): + _VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)' + _TESTS = [{ + # video_service_name = YOUTUBE + 'url': 'https://slideslive.com/38902413/gcc-ia16-backend', + 'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f', + 'info_dict': { + 'id': 'LMtgR8ba0b0', + 'ext': 'mp4', + 'title': '38902413: external video', + 'description': '3890241320170925-9-1yd6ech.mp4', + 'uploader': 'SlidesLive Administrator', + 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw', + 'upload_date': '20170925', + } + }, { + # video_service_name = youtube + 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + video_data = self._download_json( + url, video_id, headers={'Accept': 'application/json'}) + service_name = video_data['video_service_name'].lower() + if service_name == 'youtube': + yt_video_id = video_data['video_service_id'] + return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id) + else: + raise ExtractorError( + 'Unsupported service name: {0}'.format(service_name), expected=True) diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py new file mode 100644 index 000000000..661f9e59d --- /dev/null +++ b/youtube_dl/extractor/slutload.py @@ -0,0 +1,65 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class SlutloadIE(InfoExtractor): + _VALID_URL = r'https?://(?:\w+\.)?slutload\.com/(?:video/[^/]+|embed_player|watch)/(?P<id>[^/]+)' + _TESTS = [{ + 'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', + 'md5': '868309628ba00fd488cf516a113fd717', + 'info_dict': { + 'id': 'TD73btpBqSxc', + 'ext': 'mp4', + 'title': 'virginie baisee en cam', + 'age_limit': 18, + 'thumbnail': r're:https?://.*?\.jpg' + }, + }, { + # mobile site + 'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/', + 'only_matching': True, + }, { + 'url': 'http://www.slutload.com/embed_player/TD73btpBqSxc/', + 'only_matching': True, + }, { + 'url': 'http://www.slutload.com/watch/TD73btpBqSxc/Virginie-Baisee-En-Cam.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + embed_page = self._download_webpage( + 'http://www.slutload.com/embed_player/%s' % video_id, video_id, + 'Downloading embed page', fatal=False) + + if embed_page: + def extract(what): + return self._html_search_regex( + r'data-video-%s=(["\'])(?P<url>(?:(?!\1).)+)\1' % what, + embed_page, 'video %s' % what, default=None, group='url') + + video_url = extract('url') + if video_url: + title = self._html_search_regex( + r'<title>([^<]+)', embed_page, 'title', default=video_id) + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'thumbnail': extract('preview'), + 'age_limit': 18 + } + + webpage = self._download_webpage( + 'http://www.slutload.com/video/_/%s/' % video_id, video_id) + title = self._html_search_regex( + r'<h1><strong>([^<]+)</strong>', webpage, 'title').strip() + info = self._parse_html5_media_entries(url, webpage, video_id)[0] + info.update({ + 'id': video_id, + 'title': title, + 'age_limit': 18, + }) + return info diff --git a/youtube_dl/extractor/smotri.py b/youtube_dl/extractor/smotri.py new file mode 100644 index 000000000..45995f30f --- /dev/null +++ b/youtube_dl/extractor/smotri.py @@ -0,0 +1,416 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import json +import hashlib +import uuid + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + sanitized_Request, + unified_strdate, + urlencode_postdata, + xpath_text, +) + + +class SmotriIE(InfoExtractor): + IE_DESC = 'Smotri.com' + IE_NAME = 'smotri' + _VALID_URL = r'https?://(?:www\.)?(?:smotri\.com/video/view/\?id=|pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=)(?P<id>v(?P<realvideoid>[0-9]+)[a-z0-9]{4})' + _NETRC_MACHINE = 'smotri' + + _TESTS = [ + # real video id 2610366 + { + 'url': 'http://smotri.com/video/view/?id=v261036632ab', + 'md5': '02c0dfab2102984e9c5bb585cc7cc321', + 'info_dict': { + 'id': 'v261036632ab', + 'ext': 'mp4', + 'title': 'катастрофа с камер видеонаблюдения', + 'uploader': 'rbc2008', + 'uploader_id': 'rbc08', + 'upload_date': '20131118', + 'thumbnail': 'http://frame6.loadup.ru/8b/a9/2610366.3.3.jpg', + }, + }, + # real video id 57591 + { + 'url': 'http://smotri.com/video/view/?id=v57591cb20', + 'md5': '830266dfc21f077eac5afd1883091bcd', + 'info_dict': { + 'id': 'v57591cb20', + 'ext': 'flv', + 'title': 'test', + 'uploader': 'Support Photofile@photofile', + 'uploader_id': 'support-photofile', + 'upload_date': '20070704', + 'thumbnail': 'http://frame4.loadup.ru/03/ed/57591.2.3.jpg', + }, + }, + # video-password, not approved by moderator + { + 'url': 'http://smotri.com/video/view/?id=v1390466a13c', + 'md5': 'f6331cef33cad65a0815ee482a54440b', + 'info_dict': { + 'id': 'v1390466a13c', + 'ext': 'mp4', + 'title': 'TOCCA_A_NOI_-_LE_COSE_NON_VANNO_CAMBIAMOLE_ORA-1', + 'uploader': 'timoxa40', + 'uploader_id': 'timoxa40', + 'upload_date': '20100404', + 'thumbnail': 'http://frame7.loadup.ru/af/3f/1390466.3.3.jpg', + }, + 'params': { + 'videopassword': 'qwerty', + }, + 'skip': 'Video is not approved by moderator', + }, + # video-password + { + 'url': 'http://smotri.com/video/view/?id=v6984858774#', + 'md5': 'f11e01d13ac676370fc3b95b9bda11b0', + 'info_dict': { + 'id': 'v6984858774', + 'ext': 'mp4', + 'title': 'Дача Солженицина ПАРОЛЬ 223322', + 'uploader': 'psavari1', + 'uploader_id': 'psavari1', + 'upload_date': '20081103', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'params': { + 'videopassword': '223322', + }, + }, + # age limit + video-password, not approved by moderator + { + 'url': 'http://smotri.com/video/view/?id=v15408898bcf', + 'md5': '91e909c9f0521adf5ee86fbe073aad70', + 'info_dict': { + 'id': 'v15408898bcf', + 'ext': 'flv', + 'title': 'этот ролик не покажут по ТВ', + 'uploader': 'zzxxx', + 'uploader_id': 'ueggb', + 'upload_date': '20101001', + 'thumbnail': 'http://frame3.loadup.ru/75/75/1540889.1.3.jpg', + 'age_limit': 18, + }, + 'params': { + 'videopassword': '333' + }, + 'skip': 'Video is not approved by moderator', + }, + # age limit + video-password + { + 'url': 'http://smotri.com/video/view/?id=v7780025814', + 'md5': 'b4599b068422559374a59300c5337d72', + 'info_dict': { + 'id': 'v7780025814', + 'ext': 'mp4', + 'title': 'Sexy Beach (пароль 123)', + 'uploader': 'вАся', + 'uploader_id': 'asya_prosto', + 'upload_date': '20081218', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 18, + }, + 'params': { + 'videopassword': '123' + }, + }, + # swf player + { + 'url': 'http://pics.smotri.com/scrubber_custom8.swf?file=v9188090500', + 'md5': '31099eeb4bc906712c5f40092045108d', + 'info_dict': { + 'id': 'v9188090500', + 'ext': 'mp4', + 'title': 'Shakira - Don\'t Bother', + 'uploader': 'HannahL', + 'uploader_id': 'lisaha95', + 'upload_date': '20090331', + 'thumbnail': 'http://frame8.loadup.ru/44/0b/918809.7.3.jpg', + }, + }, + ] + + @classmethod + def _extract_url(cls, webpage): + mobj = re.search( + r'<embed[^>]src=(["\'])(?P<url>http://pics\.smotri\.com/(?:player|scrubber_custom8)\.swf\?file=v.+?\1)', + webpage) + if mobj is not None: + return mobj.group('url') + + mobj = re.search( + r'''(?x)<div\s+class="video_file">http://smotri\.com/video/download/file/[^<]+</div>\s* + <div\s+class="video_image">[^<]+</div>\s* + <div\s+class="video_id">(?P<id>[^<]+)</div>''', webpage) + if mobj is not None: + return 'http://smotri.com/video/view/?id=%s' % mobj.group('id') + + def _search_meta(self, name, html, display_name=None): + if display_name is None: + display_name = name + return self._html_search_meta(name, html, display_name) + + def _real_extract(self, url): + video_id = self._match_id(url) + + video_form = { + 'ticket': video_id, + 'video_url': '1', + 'frame_url': '1', + 'devid': 'LoadupFlashPlayer', + 'getvideoinfo': '1', + } + + video_password = self._downloader.params.get('videopassword') + if video_password: + video_form['pass'] = hashlib.md5(video_password.encode('utf-8')).hexdigest() + + video = self._download_json( + 'http://smotri.com/video/view/url/bot/', + video_id, 'Downloading video JSON', + data=urlencode_postdata(video_form), + headers={'Content-Type': 'application/x-www-form-urlencoded'}) + + video_url = video.get('_vidURL') or video.get('_vidURL_mp4') + + if not video_url: + if video.get('_moderate_no'): + raise ExtractorError( + 'Video %s has not been approved by moderator' % video_id, expected=True) + + if video.get('error'): + raise ExtractorError('Video %s does not exist' % video_id, expected=True) + + if video.get('_pass_protected') == 1: + msg = ('Invalid video password' if video_password + else 'This video is protected by a password, use the --video-password option') + raise ExtractorError(msg, expected=True) + + title = video['title'] + thumbnail = video.get('_imgURL') + upload_date = unified_strdate(video.get('added')) + uploader = video.get('userNick') + uploader_id = video.get('userLogin') + duration = int_or_none(video.get('duration')) + + # Video JSON does not provide enough meta data + # We will extract some from the video web page instead + webpage_url = 'http://smotri.com/video/view/?id=%s' % video_id + webpage = self._download_webpage(webpage_url, video_id, 'Downloading video page') + + # Warning if video is unavailable + warning = self._html_search_regex( + r'<div[^>]+class="videoUnModer"[^>]*>(.+?)</div>', webpage, + 'warning message', default=None) + if warning is not None: + self._downloader.report_warning( + 'Video %s may not be available; smotri said: %s ' % + (video_id, warning)) + + # Adult content + if 'EroConfirmText">' in webpage: + self.report_age_confirmation() + confirm_string = self._html_search_regex( + r'<a[^>]+href="/video/view/\?id=%s&confirm=([^"]+)"' % video_id, + webpage, 'confirm string') + confirm_url = webpage_url + '&confirm=%s' % confirm_string + webpage = self._download_webpage( + confirm_url, video_id, + 'Downloading video page (age confirmed)') + adult_content = True + else: + adult_content = False + + view_count = self._html_search_regex( + r'(?s)Общее количество просмотров.*?<span class="Number">(\d+)</span>', + webpage, 'view count', fatal=False) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'upload_date': upload_date, + 'uploader_id': uploader_id, + 'duration': duration, + 'view_count': int_or_none(view_count), + 'age_limit': 18 if adult_content else 0, + } + + +class SmotriCommunityIE(InfoExtractor): + IE_DESC = 'Smotri.com community videos' + IE_NAME = 'smotri:community' + _VALID_URL = r'https?://(?:www\.)?smotri\.com/community/video/(?P<id>[0-9A-Za-z_\'-]+)' + _TEST = { + 'url': 'http://smotri.com/community/video/kommuna', + 'info_dict': { + 'id': 'kommuna', + }, + 'playlist_mincount': 4, + } + + def _real_extract(self, url): + community_id = self._match_id(url) + + rss = self._download_xml( + 'http://smotri.com/export/rss/video/by/community/-/%s/video.xml' % community_id, + community_id, 'Downloading community RSS') + + entries = [ + self.url_result(video_url.text, SmotriIE.ie_key()) + for video_url in rss.findall('./channel/item/link')] + + return self.playlist_result(entries, community_id) + + +class SmotriUserIE(InfoExtractor): + IE_DESC = 'Smotri.com user videos' + IE_NAME = 'smotri:user' + _VALID_URL = r'https?://(?:www\.)?smotri\.com/user/(?P<id>[0-9A-Za-z_\'-]+)' + _TESTS = [{ + 'url': 'http://smotri.com/user/inspector', + 'info_dict': { + 'id': 'inspector', + 'title': 'Inspector', + }, + 'playlist_mincount': 9, + }] + + def _real_extract(self, url): + user_id = self._match_id(url) + + rss = self._download_xml( + 'http://smotri.com/export/rss/user/video/-/%s/video.xml' % user_id, + user_id, 'Downloading user RSS') + + entries = [self.url_result(video_url.text, 'Smotri') + for video_url in rss.findall('./channel/item/link')] + + description_text = xpath_text(rss, './channel/description') or '' + user_nickname = self._search_regex( + '^Видео режиссера (.+)$', description_text, + 'user nickname', fatal=False) + + return self.playlist_result(entries, user_id, user_nickname) + + +class SmotriBroadcastIE(InfoExtractor): + IE_DESC = 'Smotri.com broadcasts' + IE_NAME = 'smotri:broadcast' + _VALID_URL = r'https?://(?:www\.)?(?P<url>smotri\.com/live/(?P<id>[^/]+))/?.*' + _NETRC_MACHINE = 'smotri' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + broadcast_id = mobj.group('id') + + broadcast_url = 'http://' + mobj.group('url') + broadcast_page = self._download_webpage(broadcast_url, broadcast_id, 'Downloading broadcast page') + + if re.search('>Режиссер с логином <br/>"%s"<br/> <span>не существует<' % broadcast_id, broadcast_page) is not None: + raise ExtractorError( + 'Broadcast %s does not exist' % broadcast_id, expected=True) + + # Adult content + if re.search('EroConfirmText">', broadcast_page) is not None: + + (username, password) = self._get_login_info() + if username is None: + self.raise_login_required( + 'Erotic broadcasts allowed only for registered users') + + login_form = { + 'login-hint53': '1', + 'confirm_erotic': '1', + 'login': username, + 'password': password, + } + + request = sanitized_Request( + broadcast_url + '/?no_redirect=1', urlencode_postdata(login_form)) + request.add_header('Content-Type', 'application/x-www-form-urlencoded') + broadcast_page = self._download_webpage( + request, broadcast_id, 'Logging in and confirming age') + + if '>Неверный логин или пароль<' in broadcast_page: + raise ExtractorError( + 'Unable to log in: bad username or password', expected=True) + + adult_content = True + else: + adult_content = False + + ticket = self._html_search_regex( + (r'data-user-file=(["\'])(?P<ticket>(?!\1).+)\1', + r"window\.broadcast_control\.addFlashVar\('file'\s*,\s*'(?P<ticket>[^']+)'\)"), + broadcast_page, 'broadcast ticket', group='ticket') + + broadcast_url = 'http://smotri.com/broadcast/view/url/?ticket=%s' % ticket + + broadcast_password = self._downloader.params.get('videopassword') + if broadcast_password: + broadcast_url += '&pass=%s' % hashlib.md5(broadcast_password.encode('utf-8')).hexdigest() + + broadcast_json_page = self._download_webpage( + broadcast_url, broadcast_id, 'Downloading broadcast JSON') + + try: + broadcast_json = json.loads(broadcast_json_page) + + protected_broadcast = broadcast_json['_pass_protected'] == 1 + if protected_broadcast and not broadcast_password: + raise ExtractorError( + 'This broadcast is protected by a password, use the --video-password option', + expected=True) + + broadcast_offline = broadcast_json['is_play'] == 0 + if broadcast_offline: + raise ExtractorError('Broadcast %s is offline' % broadcast_id, expected=True) + + rtmp_url = broadcast_json['_server'] + mobj = re.search(r'^rtmp://[^/]+/(?P<app>.+)/?$', rtmp_url) + if not mobj: + raise ExtractorError('Unexpected broadcast rtmp URL') + + broadcast_playpath = broadcast_json['_streamName'] + broadcast_app = '%s/%s' % (mobj.group('app'), broadcast_json['_vidURL']) + broadcast_thumbnail = broadcast_json.get('_imgURL') + broadcast_title = self._live_title(broadcast_json['title']) + broadcast_description = broadcast_json.get('description') + broadcaster_nick = broadcast_json.get('nick') + broadcaster_login = broadcast_json.get('login') + rtmp_conn = 'S:%s' % uuid.uuid4().hex + except KeyError: + if protected_broadcast: + raise ExtractorError('Bad broadcast password', expected=True) + raise ExtractorError('Unexpected broadcast JSON') + + return { + 'id': broadcast_id, + 'url': rtmp_url, + 'title': broadcast_title, + 'thumbnail': broadcast_thumbnail, + 'description': broadcast_description, + 'uploader': broadcaster_nick, + 'uploader_id': broadcaster_login, + 'age_limit': 18 if adult_content else 0, + 'ext': 'flv', + 'play_path': broadcast_playpath, + 'player_url': 'http://pics.smotri.com/broadcast_play.swf', + 'app': broadcast_app, + 'rtmp_live': True, + 'rtmp_conn': rtmp_conn, + 'is_live': True, + } diff --git a/youtube_dl/extractor/snotr.py b/youtube_dl/extractor/snotr.py new file mode 100644 index 000000000..f77354748 --- /dev/null +++ b/youtube_dl/extractor/snotr.py @@ -0,0 +1,73 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + parse_filesize, + str_to_int, +) + + +class SnotrIE(InfoExtractor): + _VALID_URL = r'http?://(?:www\.)?snotr\.com/video/(?P<id>\d+)/([\w]+)' + _TESTS = [{ + 'url': 'http://www.snotr.com/video/13708/Drone_flying_through_fireworks', + 'info_dict': { + 'id': '13708', + 'ext': 'mp4', + 'title': 'Drone flying through fireworks!', + 'duration': 248, + 'filesize_approx': 40700000, + 'description': 'A drone flying through Fourth of July Fireworks', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'expected_warnings': ['description'], + }, { + 'url': 'http://www.snotr.com/video/530/David_Letteman_-_George_W_Bush_Top_10', + 'info_dict': { + 'id': '530', + 'ext': 'mp4', + 'title': 'David Letteman - George W. Bush Top 10', + 'duration': 126, + 'filesize_approx': 8500000, + 'description': 'The top 10 George W. Bush moments, brought to you by David Letterman!', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + title = self._og_search_title(webpage) + + description = self._og_search_description(webpage) + info_dict = self._parse_html5_media_entries( + url, webpage, video_id, m3u8_entry_protocol='m3u8_native')[0] + + view_count = str_to_int(self._html_search_regex( + r'<p[^>]*>\s*<strong[^>]*>Views:</strong>\s*<span[^>]*>([\d,\.]+)', + webpage, 'view count', fatal=False)) + + duration = parse_duration(self._html_search_regex( + r'<p[^>]*>\s*<strong[^>]*>Length:</strong>\s*<span[^>]*>([\d:]+)', + webpage, 'duration', fatal=False)) + + filesize_approx = parse_filesize(self._html_search_regex( + r'<p[^>]*>\s*<strong[^>]*>Filesize:</strong>\s*<span[^>]*>([^<]+)', + webpage, 'filesize', fatal=False)) + + info_dict.update({ + 'id': video_id, + 'description': description, + 'title': title, + 'view_count': view_count, + 'duration': duration, + 'filesize_approx': filesize_approx, + }) + + return info_dict diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py new file mode 100644 index 000000000..a62ed84f1 --- /dev/null +++ b/youtube_dl/extractor/sohu.py @@ -0,0 +1,202 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_urllib_parse_urlencode, +) +from ..utils import ( + ExtractorError, + int_or_none, + try_get, +) + + +class SohuIE(InfoExtractor): + _VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?' + + # Sohu videos give different MD5 sums on Travis CI and my machine + _TESTS = [{ + 'note': 'This video is available only in Mainland China', + 'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super', + 'info_dict': { + 'id': '382479172', + 'ext': 'mp4', + 'title': 'MV:Far East Movement《The Illest》', + }, + 'skip': 'On available in China', + }, { + 'url': 'http://tv.sohu.com/20150305/n409385080.shtml', + 'info_dict': { + 'id': '409385080', + 'ext': 'mp4', + 'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》', + } + }, { + 'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml', + 'info_dict': { + 'id': '78693464', + 'ext': 'mp4', + 'title': '【爱范品】第31期:MWC见不到的奇葩手机', + } + }, { + 'note': 'Multipart video', + 'url': 'http://my.tv.sohu.com/pl/8384802/78910339.shtml', + 'info_dict': { + 'id': '78910339', + 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', + }, + 'playlist': [{ + 'info_dict': { + 'id': '78910339_part1', + 'ext': 'mp4', + 'duration': 294, + 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', + } + }, { + 'info_dict': { + 'id': '78910339_part2', + 'ext': 'mp4', + 'duration': 300, + 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', + } + }, { + 'info_dict': { + 'id': '78910339_part3', + 'ext': 'mp4', + 'duration': 150, + 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆', + } + }] + }, { + 'note': 'Video with title containing dash', + 'url': 'http://my.tv.sohu.com/us/249884221/78932792.shtml', + 'info_dict': { + 'id': '78932792', + 'ext': 'mp4', + 'title': 'youtube-dl testing video', + }, + 'params': { + 'skip_download': True + } + }] + + def _real_extract(self, url): + + def _fetch_data(vid_id, mytv=False): + if mytv: + base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid=' + else: + base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid=' + + return self._download_json( + base_data_url + vid_id, video_id, + 'Downloading JSON data for %s' % vid_id, + headers=self.geo_verification_headers()) + + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + mytv = mobj.group('mytv') is not None + + webpage = self._download_webpage(url, video_id) + + title = re.sub(r' - 搜狐视频$', '', self._og_search_title(webpage)) + + vid = self._html_search_regex( + r'var vid ?= ?["\'](\d+)["\']', + webpage, 'video path') + vid_data = _fetch_data(vid, mytv) + if vid_data['play'] != 1: + if vid_data.get('status') == 12: + raise ExtractorError( + '%s said: There\'s something wrong in the video.' % self.IE_NAME, + expected=True) + else: + self.raise_geo_restricted( + '%s said: The video is only licensed to users in Mainland China.' % self.IE_NAME) + + formats_json = {} + for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'): + vid_id = vid_data['data'].get('%sVid' % format_id) + if not vid_id: + continue + vid_id = compat_str(vid_id) + formats_json[format_id] = vid_data if vid == vid_id else _fetch_data(vid_id, mytv) + + part_count = vid_data['data']['totalBlocks'] + + playlist = [] + for i in range(part_count): + formats = [] + for format_id, format_data in formats_json.items(): + allot = format_data['allot'] + + data = format_data['data'] + clips_url = data['clipsURL'] + su = data['su'] + + video_url = 'newflv.sohu.ccgslb.net' + cdnId = None + retries = 0 + + while 'newflv.sohu.ccgslb.net' in video_url: + params = { + 'prot': 9, + 'file': clips_url[i], + 'new': su[i], + 'prod': 'flash', + 'rb': 1, + } + + if cdnId is not None: + params['idc'] = cdnId + + download_note = 'Downloading %s video URL part %d of %d' % ( + format_id, i + 1, part_count) + + if retries > 0: + download_note += ' (retry #%d)' % retries + part_info = self._parse_json(self._download_webpage( + 'http://%s/?%s' % (allot, compat_urllib_parse_urlencode(params)), + video_id, download_note), video_id) + + video_url = part_info['url'] + cdnId = part_info.get('nid') + + retries += 1 + if retries > 5: + raise ExtractorError('Failed to get video URL') + + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'filesize': int_or_none( + try_get(data, lambda x: x['clipsBytes'][i])), + 'width': int_or_none(data.get('width')), + 'height': int_or_none(data.get('height')), + 'fps': int_or_none(data.get('fps')), + }) + self._sort_formats(formats) + + playlist.append({ + 'id': '%s_part%d' % (video_id, i + 1), + 'title': title, + 'duration': vid_data['data']['clipsDuration'][i], + 'formats': formats, + }) + + if len(playlist) == 1: + info = playlist[0] + info['id'] = video_id + else: + info = { + '_type': 'multi_video', + 'entries': playlist, + 'id': video_id, + 'title': title, + } + + return info diff --git a/youtube_dl/extractor/sonyliv.py b/youtube_dl/extractor/sonyliv.py new file mode 100644 index 000000000..58a8c0d4d --- /dev/null +++ b/youtube_dl/extractor/sonyliv.py @@ -0,0 +1,40 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import smuggle_url + + +class SonyLIVIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/details/[^/]+/(?P<id>\d+)' + _TESTS = [{ + 'url': "http://www.sonyliv.com/details/episodes/5024612095001/Ep.-1---Achaari-Cheese-Toast---Bachelor's-Delight", + 'info_dict': { + 'title': "Ep. 1 - Achaari Cheese Toast - Bachelor's Delight", + 'id': 'ref:5024612095001', + 'ext': 'mp4', + 'upload_date': '20170923', + 'description': 'md5:7f28509a148d5be9d0782b4d5106410d', + 'uploader_id': '5182475815001', + 'timestamp': 1506200547, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['BrightcoveNew'], + }, { + 'url': 'http://www.sonyliv.com/details/full%20movie/4951168986001/Sei-Raat-(Bangla)', + 'only_matching': True, + }] + + # BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/4338955589001/default_default/index.html?videoId=%s' + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5182475815001/default_default/index.html?videoId=ref:%s' + + def _real_extract(self, url): + brightcove_id = self._match_id(url) + return self.url_result( + smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, { + 'geo_countries': ['IN'], + 'referrer': url, + }), + 'BrightcoveNew', brightcove_id) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py new file mode 100644 index 000000000..3a8626e02 --- /dev/null +++ b/youtube_dl/extractor/soundcloud.py @@ -0,0 +1,795 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import itertools +import re + +from .common import ( + InfoExtractor, + SearchInfoExtractor +) +from ..compat import ( + compat_str, + compat_urlparse, + compat_urllib_parse_urlencode, +) +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, + KNOWN_EXTENSIONS, + merge_dicts, + mimetype2ext, + str_or_none, + try_get, + unified_timestamp, + update_url_query, + url_or_none, +) + + +class SoundcloudIE(InfoExtractor): + """Information extractor for soundcloud.com + To access the media, the uid of the song and a stream token + must be extracted from the page source and the script must make + a request to media.soundcloud.com/crossdomain.xml. Then + the media can be grabbed by requesting from an url composed + of the stream token and uid + """ + + _VALID_URL = r'''(?x)^(?:https?://)? + (?:(?:(?:www\.|m\.)?soundcloud\.com/ + (?!stations/track) + (?P<uploader>[\w\d-]+)/ + (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) + (?P<title>[\w\d-]+)/? + (?P<token>[^?]+?)?(?:[?].*)?$) + |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+) + (?:/?\?secret_token=(?P<secret_token>[^&]+))?) + |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*) + ) + ''' + IE_NAME = 'soundcloud' + _TESTS = [ + { + 'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy', + 'md5': 'ebef0a451b909710ed1d7787dddbf0d7', + 'info_dict': { + 'id': '62986583', + 'ext': 'mp3', + 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1', + 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d', + 'uploader': 'E.T. ExTerrestrial Music', + 'timestamp': 1349920598, + 'upload_date': '20121011', + 'duration': 143.216, + 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, + } + }, + # not streamable song + { + 'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep', + 'info_dict': { + 'id': '47127627', + 'ext': 'mp3', + 'title': 'Goldrushed', + 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com', + 'uploader': 'The Royal Concept', + 'timestamp': 1337635207, + 'upload_date': '20120521', + 'duration': 30, + 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, + }, + 'params': { + # rtmp + 'skip_download': True, + }, + }, + # private link + { + 'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp', + 'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604', + 'info_dict': { + 'id': '123998367', + 'ext': 'mp3', + 'title': 'Youtube - Dl Test Video \'\' Ä↭', + 'description': 'test chars: \"\'/\\ä↭', + 'uploader': 'jaimeMF', + 'timestamp': 1386604920, + 'upload_date': '20131209', + 'duration': 9.927, + 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, + }, + }, + # private link (alt format) + { + 'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp', + 'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604', + 'info_dict': { + 'id': '123998367', + 'ext': 'mp3', + 'title': 'Youtube - Dl Test Video \'\' Ä↭', + 'description': 'test chars: \"\'/\\ä↭', + 'uploader': 'jaimeMF', + 'timestamp': 1386604920, + 'upload_date': '20131209', + 'duration': 9.927, + 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, + }, + }, + # downloadable song + { + 'url': 'https://soundcloud.com/oddsamples/bus-brakes', + 'md5': '7624f2351f8a3b2e7cd51522496e7631', + 'info_dict': { + 'id': '128590877', + 'ext': 'mp3', + 'title': 'Bus Brakes', + 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66', + 'uploader': 'oddsamples', + 'timestamp': 1389232924, + 'upload_date': '20140109', + 'duration': 17.346, + 'license': 'cc-by-sa', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, + }, + }, + # private link, downloadable format + { + 'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd', + 'md5': '64a60b16e617d41d0bef032b7f55441e', + 'info_dict': { + 'id': '340344461', + 'ext': 'wav', + 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]', + 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366', + 'uploader': 'Ori Uplift Music', + 'timestamp': 1504206263, + 'upload_date': '20170831', + 'duration': 7449.096, + 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, + }, + }, + # no album art, use avatar pic for thumbnail + { + 'url': 'https://soundcloud.com/garyvee/sideways-prod-mad-real', + 'md5': '59c7872bc44e5d99b7211891664760c2', + 'info_dict': { + 'id': '309699954', + 'ext': 'mp3', + 'title': 'Sideways (Prod. Mad Real)', + 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', + 'uploader': 'garyvee', + 'timestamp': 1488152409, + 'upload_date': '20170226', + 'duration': 207.012, + 'thumbnail': r're:https?://.*\.jpg', + 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, + }, + 'params': { + 'skip_download': True, + }, + }, + # not avaialble via api.soundcloud.com/i1/tracks/id/streams + { + 'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer', + 'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7', + 'info_dict': { + 'id': '583011102', + 'ext': 'mp3', + 'title': 'Mezzo Valzer', + 'description': 'md5:4138d582f81866a530317bae316e8b61', + 'uploader': 'Giovanni Sarani', + 'timestamp': 1551394171, + 'upload_date': '20190228', + 'duration': 180.157, + 'thumbnail': r're:https?://.*\.jpg', + 'license': 'all-rights-reserved', + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'repost_count': int, + }, + 'expected_warnings': ['Unable to download JSON metadata'], + } + ] + + _CLIENT_ID = 'BeGVhOrGmfboy1LtiHTQF6Ejpt9ULJCI' + + @staticmethod + def _extract_urls(webpage): + return [m.group('url') for m in re.finditer( + r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1', + webpage)] + + @classmethod + def _resolv_url(cls, url): + return 'https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID + + def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None): + track_id = compat_str(info['id']) + title = info['title'] + name = full_title or track_id + if quiet: + self.report_extraction(name) + thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url') + if isinstance(thumbnail, compat_str): + thumbnail = thumbnail.replace('-large', '-t500x500') + username = try_get(info, lambda x: x['user']['username'], compat_str) + + def extract_count(key): + return int_or_none(info.get('%s_count' % key)) + + like_count = extract_count('favoritings') + if like_count is None: + like_count = extract_count('likes') + + result = { + 'id': track_id, + 'uploader': username, + 'timestamp': unified_timestamp(info.get('created_at')), + 'title': title, + 'description': info.get('description'), + 'thumbnail': thumbnail, + 'duration': float_or_none(info.get('duration'), 1000), + 'webpage_url': info.get('permalink_url'), + 'license': info.get('license'), + 'view_count': extract_count('playback'), + 'like_count': like_count, + 'comment_count': extract_count('comment'), + 'repost_count': extract_count('reposts'), + 'genre': info.get('genre'), + } + + format_urls = set() + formats = [] + query = {'client_id': self._CLIENT_ID} + if secret_token is not None: + query['secret_token'] = secret_token + if info.get('downloadable', False): + # We can build a direct link to the song + format_url = update_url_query( + 'https://api.soundcloud.com/tracks/%s/download' % track_id, query) + format_urls.add(format_url) + formats.append({ + 'format_id': 'download', + 'ext': info.get('original_format', 'mp3'), + 'url': format_url, + 'vcodec': 'none', + 'preference': 10, + }) + + # Old API, does not work for some tracks (e.g. + # https://soundcloud.com/giovannisarani/mezzo-valzer) + format_dict = self._download_json( + 'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id, + track_id, 'Downloading track url', query=query, fatal=False) + + if format_dict: + for key, stream_url in format_dict.items(): + if stream_url in format_urls: + continue + format_urls.add(stream_url) + ext, abr = 'mp3', None + mobj = re.search(r'_([^_]+)_(\d+)_url', key) + if mobj: + ext, abr = mobj.groups() + abr = int(abr) + if key.startswith('http'): + stream_formats = [{ + 'format_id': key, + 'ext': ext, + 'url': stream_url, + }] + elif key.startswith('rtmp'): + # The url doesn't have an rtmp app, we have to extract the playpath + url, path = stream_url.split('mp3:', 1) + stream_formats = [{ + 'format_id': key, + 'url': url, + 'play_path': 'mp3:' + path, + 'ext': 'flv', + }] + elif key.startswith('hls'): + stream_formats = self._extract_m3u8_formats( + stream_url, track_id, ext, entry_protocol='m3u8_native', + m3u8_id=key, fatal=False) + else: + continue + + if abr: + for f in stream_formats: + f['abr'] = abr + + formats.extend(stream_formats) + + # New API + transcodings = try_get( + info, lambda x: x['media']['transcodings'], list) or [] + for t in transcodings: + if not isinstance(t, dict): + continue + format_url = url_or_none(t.get('url')) + if not format_url: + continue + stream = self._download_json( + update_url_query(format_url, query), track_id, fatal=False) + if not isinstance(stream, dict): + continue + stream_url = url_or_none(stream.get('url')) + if not stream_url: + continue + if stream_url in format_urls: + continue + format_urls.add(stream_url) + protocol = try_get(t, lambda x: x['format']['protocol'], compat_str) + if protocol != 'hls' and '/hls' in format_url: + protocol = 'hls' + ext = None + preset = str_or_none(t.get('preset')) + if preset: + ext = preset.split('_')[0] + if ext not in KNOWN_EXTENSIONS: + mimetype = try_get( + t, lambda x: x['format']['mime_type'], compat_str) + ext = mimetype2ext(mimetype) or 'mp3' + format_id_list = [] + if protocol: + format_id_list.append(protocol) + format_id_list.append(ext) + format_id = '_'.join(format_id_list) + formats.append({ + 'url': stream_url, + 'format_id': format_id, + 'ext': ext, + 'protocol': 'm3u8_native' if protocol == 'hls' else 'http', + }) + + if not formats: + # We fallback to the stream_url in the original info, this + # cannot be always used, sometimes it can give an HTTP 404 error + formats.append({ + 'format_id': 'fallback', + 'url': update_url_query(info['stream_url'], query), + 'ext': 'mp3', + }) + self._check_formats(formats, track_id) + + for f in formats: + f['vcodec'] = 'none' + + self._sort_formats(formats) + result['formats'] = formats + + return result + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) + if mobj is None: + raise ExtractorError('Invalid URL: %s' % url) + + track_id = mobj.group('track_id') + new_info = {} + + if track_id is not None: + info_json_url = 'https://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID + full_title = track_id + token = mobj.group('secret_token') + if token: + info_json_url += '&secret_token=' + token + elif mobj.group('player'): + query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + real_url = query['url'][0] + # If the token is in the query of the original url we have to + # manually add it + if 'secret_token' in query: + real_url += '?secret_token=' + query['secret_token'][0] + return self.url_result(real_url) + else: + # extract uploader (which is in the url) + uploader = mobj.group('uploader') + # extract simple title (uploader + slug of song title) + slug_title = mobj.group('title') + token = mobj.group('token') + full_title = resolve_title = '%s/%s' % (uploader, slug_title) + if token: + resolve_title += '/%s' % token + + webpage = self._download_webpage(url, full_title, fatal=False) + if webpage: + entries = self._parse_json( + self._search_regex( + r'var\s+c\s*=\s*(\[.+?\])\s*,\s*o\s*=Date\b', webpage, + 'data', default='[]'), full_title, fatal=False) + if entries: + for e in entries: + if not isinstance(e, dict): + continue + if e.get('id') != 67: + continue + data = try_get(e, lambda x: x['data'][0], dict) + if data: + new_info = data + break + info_json_url = self._resolv_url( + 'https://soundcloud.com/%s' % resolve_title) + + # Contains some additional info missing from new_info + info = self._download_json( + info_json_url, full_title, 'Downloading info JSON') + + return self._extract_info_dict( + merge_dicts(info, new_info), full_title, secret_token=token) + + +class SoundcloudPlaylistBaseIE(SoundcloudIE): + @staticmethod + def _extract_id(e): + return compat_str(e['id']) if e.get('id') else None + + def _extract_track_entries(self, tracks): + return [ + self.url_result( + track['permalink_url'], SoundcloudIE.ie_key(), + video_id=self._extract_id(track)) + for track in tracks if track.get('permalink_url')] + + +class SoundcloudSetIE(SoundcloudPlaylistBaseIE): + _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?' + IE_NAME = 'soundcloud:set' + _TESTS = [{ + 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep', + 'info_dict': { + 'id': '2284613', + 'title': 'The Royal Concept EP', + }, + 'playlist_mincount': 5, + }, { + 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + # extract uploader (which is in the url) + uploader = mobj.group('uploader') + # extract simple title (uploader + slug of song title) + slug_title = mobj.group('slug_title') + full_title = '%s/sets/%s' % (uploader, slug_title) + url = 'https://soundcloud.com/%s/sets/%s' % (uploader, slug_title) + + token = mobj.group('token') + if token: + full_title += '/' + token + url += '/' + token + + resolv_url = self._resolv_url(url) + info = self._download_json(resolv_url, full_title) + + if 'errors' in info: + msgs = (compat_str(err['error_message']) for err in info['errors']) + raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs)) + + entries = self._extract_track_entries(info['tracks']) + + return { + '_type': 'playlist', + 'entries': entries, + 'id': '%s' % info['id'], + 'title': info['title'], + } + + +class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE): + _API_V2_BASE = 'https://api-v2.soundcloud.com' + + def _extract_playlist(self, base_url, playlist_id, playlist_title): + COMMON_QUERY = { + 'limit': 50, + 'client_id': self._CLIENT_ID, + 'linked_partitioning': '1', + } + + query = COMMON_QUERY.copy() + query['offset'] = 0 + + next_href = base_url + '?' + compat_urllib_parse_urlencode(query) + + entries = [] + for i in itertools.count(): + response = self._download_json( + next_href, playlist_id, 'Downloading track page %s' % (i + 1)) + + collection = response['collection'] + + if not isinstance(collection, list): + collection = [] + + # Empty collection may be returned, in this case we proceed + # straight to next_href + + def resolve_entry(candidates): + for cand in candidates: + if not isinstance(cand, dict): + continue + permalink_url = url_or_none(cand.get('permalink_url')) + if not permalink_url: + continue + return self.url_result( + permalink_url, + ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None, + video_id=self._extract_id(cand), + video_title=cand.get('title')) + + for e in collection: + entry = resolve_entry((e, e.get('track'), e.get('playlist'))) + if entry: + entries.append(entry) + + next_href = response.get('next_href') + if not next_href: + break + + parsed_next_href = compat_urlparse.urlparse(response['next_href']) + qs = compat_urlparse.parse_qs(parsed_next_href.query) + qs.update(COMMON_QUERY) + next_href = compat_urlparse.urlunparse( + parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True))) + + return { + '_type': 'playlist', + 'id': playlist_id, + 'title': playlist_title, + 'entries': entries, + } + + +class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE): + _VALID_URL = r'''(?x) + https?:// + (?:(?:www|m)\.)?soundcloud\.com/ + (?P<user>[^/]+) + (?:/ + (?P<rsrc>tracks|albums|sets|reposts|likes|spotlight) + )? + /?(?:[?#].*)?$ + ''' + IE_NAME = 'soundcloud:user' + _TESTS = [{ + 'url': 'https://soundcloud.com/soft-cell-official', + 'info_dict': { + 'id': '207965082', + 'title': 'Soft Cell (All)', + }, + 'playlist_mincount': 28, + }, { + 'url': 'https://soundcloud.com/soft-cell-official/tracks', + 'info_dict': { + 'id': '207965082', + 'title': 'Soft Cell (Tracks)', + }, + 'playlist_mincount': 27, + }, { + 'url': 'https://soundcloud.com/soft-cell-official/albums', + 'info_dict': { + 'id': '207965082', + 'title': 'Soft Cell (Albums)', + }, + 'playlist_mincount': 1, + }, { + 'url': 'https://soundcloud.com/jcv246/sets', + 'info_dict': { + 'id': '12982173', + 'title': 'Jordi / cv (Playlists)', + }, + 'playlist_mincount': 2, + }, { + 'url': 'https://soundcloud.com/jcv246/reposts', + 'info_dict': { + 'id': '12982173', + 'title': 'Jordi / cv (Reposts)', + }, + 'playlist_mincount': 6, + }, { + 'url': 'https://soundcloud.com/clalberg/likes', + 'info_dict': { + 'id': '11817582', + 'title': 'clalberg (Likes)', + }, + 'playlist_mincount': 5, + }, { + 'url': 'https://soundcloud.com/grynpyret/spotlight', + 'info_dict': { + 'id': '7098329', + 'title': 'Grynpyret (Spotlight)', + }, + 'playlist_mincount': 1, + }] + + _BASE_URL_MAP = { + 'all': '%s/stream/users/%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'albums': '%s/users/%%s/albums' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'reposts': '%s/stream/users/%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + 'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE, + } + + _TITLE_MAP = { + 'all': 'All', + 'tracks': 'Tracks', + 'albums': 'Albums', + 'sets': 'Playlists', + 'reposts': 'Reposts', + 'likes': 'Likes', + 'spotlight': 'Spotlight', + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + uploader = mobj.group('user') + + url = 'https://soundcloud.com/%s/' % uploader + resolv_url = self._resolv_url(url) + user = self._download_json( + resolv_url, uploader, 'Downloading user info') + + resource = mobj.group('rsrc') or 'all' + + return self._extract_playlist( + self._BASE_URL_MAP[resource] % user['id'], compat_str(user['id']), + '%s (%s)' % (user['username'], self._TITLE_MAP[resource])) + + +class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): + _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)' + IE_NAME = 'soundcloud:trackstation' + _TESTS = [{ + 'url': 'https://soundcloud.com/stations/track/officialsundial/your-text', + 'info_dict': { + 'id': '286017854', + 'title': 'Track station: your-text', + }, + 'playlist_mincount': 47, + }] + + def _real_extract(self, url): + track_name = self._match_id(url) + + webpage = self._download_webpage(url, track_name) + + track_id = self._search_regex( + r'soundcloud:track-stations:(\d+)', webpage, 'track id') + + return self._extract_playlist( + '%s/stations/soundcloud:track-stations:%s/tracks' + % (self._API_V2_BASE, track_id), + track_id, 'Track station: %s' % track_name) + + +class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): + _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' + IE_NAME = 'soundcloud:playlist' + _TESTS = [{ + 'url': 'https://api.soundcloud.com/playlists/4110309', + 'info_dict': { + 'id': '4110309', + 'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]', + 'description': 're:.*?TILT Brass - Bowery Poetry Club', + }, + 'playlist_count': 6, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + playlist_id = mobj.group('id') + base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id) + + data_dict = { + 'client_id': self._CLIENT_ID, + } + token = mobj.group('token') + + if token: + data_dict['secret_token'] = token + + data = compat_urllib_parse_urlencode(data_dict) + data = self._download_json( + base_url + data, playlist_id, 'Downloading playlist') + + entries = self._extract_track_entries(data['tracks']) + + return { + '_type': 'playlist', + 'id': playlist_id, + 'title': data.get('title'), + 'description': data.get('description'), + 'entries': entries, + } + + +class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): + IE_NAME = 'soundcloud:search' + IE_DESC = 'Soundcloud search' + _MAX_RESULTS = float('inf') + _TESTS = [{ + 'url': 'scsearch15:post-avant jazzcore', + 'info_dict': { + 'title': 'post-avant jazzcore', + }, + 'playlist_count': 15, + }] + + _SEARCH_KEY = 'scsearch' + _MAX_RESULTS_PER_PAGE = 200 + _DEFAULT_RESULTS_PER_PAGE = 50 + _API_V2_BASE = 'https://api-v2.soundcloud.com' + + def _get_collection(self, endpoint, collection_id, **query): + limit = min( + query.get('limit', self._DEFAULT_RESULTS_PER_PAGE), + self._MAX_RESULTS_PER_PAGE) + query['limit'] = limit + query['client_id'] = self._CLIENT_ID + query['linked_partitioning'] = '1' + query['offset'] = 0 + data = compat_urllib_parse_urlencode(query) + next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data) + + collected_results = 0 + + for i in itertools.count(1): + response = self._download_json( + next_url, collection_id, 'Downloading page {0}'.format(i), + 'Unable to download API page') + + collection = response.get('collection', []) + if not collection: + break + + collection = list(filter(bool, collection)) + collected_results += len(collection) + + for item in collection: + yield self.url_result(item['uri'], SoundcloudIE.ie_key()) + + if not collection or collected_results >= limit: + break + + next_url = response.get('next_href') + if not next_url: + break + + def _get_n_results(self, query, n): + tracks = self._get_collection('/search/tracks', query, limit=n, q=query) + return self.playlist_result(tracks, playlist_title=query) diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dl/extractor/soundgasm.py new file mode 100644 index 000000000..3d78a9d76 --- /dev/null +++ b/youtube_dl/extractor/soundgasm.py @@ -0,0 +1,77 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class SoundgasmIE(InfoExtractor): + IE_NAME = 'soundgasm' + _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)' + _TEST = { + 'url': 'http://soundgasm.net/u/ytdl/Piano-sample', + 'md5': '010082a2c802c5275bb00030743e75ad', + 'info_dict': { + 'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9', + 'ext': 'm4a', + 'title': 'Piano sample', + 'description': 'Royalty Free Sample Music', + 'uploader': 'ytdl', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + display_id = mobj.group('display_id') + + webpage = self._download_webpage(url, display_id) + + audio_url = self._html_search_regex( + r'(?s)m4a\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, + 'audio URL', group='url') + + title = self._search_regex( + r'<div[^>]+\bclass=["\']jp-title[^>]+>([^<]+)', + webpage, 'title', default=display_id) + + description = self._html_search_regex( + (r'(?s)<div[^>]+\bclass=["\']jp-description[^>]+>(.+?)</div>', + r'(?s)<li>Description:\s(.*?)<\/li>'), + webpage, 'description', fatal=False) + + audio_id = self._search_regex( + r'/([^/]+)\.m4a', audio_url, 'audio id', default=display_id) + + return { + 'id': audio_id, + 'display_id': display_id, + 'url': audio_url, + 'vcodec': 'none', + 'title': title, + 'description': description, + 'uploader': mobj.group('user'), + } + + +class SoundgasmProfileIE(InfoExtractor): + IE_NAME = 'soundgasm:profile' + _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$' + _TEST = { + 'url': 'http://soundgasm.net/u/ytdl', + 'info_dict': { + 'id': 'ytdl', + }, + 'playlist_count': 1, + } + + def _real_extract(self, url): + profile_id = self._match_id(url) + + webpage = self._download_webpage(url, profile_id) + + entries = [ + self.url_result(audio_url, 'Soundgasm') + for audio_url in re.findall(r'href="([^"]+/u/%s/[^"]+)' % profile_id, webpage)] + + return self.playlist_result(entries, profile_id) diff --git a/youtube_dl/extractor/southpark.py b/youtube_dl/extractor/southpark.py new file mode 100644 index 000000000..da75a43a7 --- /dev/null +++ b/youtube_dl/extractor/southpark.py @@ -0,0 +1,115 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .mtv import MTVServicesInfoExtractor + + +class SouthParkIE(MTVServicesInfoExtractor): + IE_NAME = 'southpark.cc.com' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))' + + _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss' + + _TESTS = [{ + 'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured', + 'info_dict': { + 'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30', + 'ext': 'mp4', + 'title': 'South Park|Bat Daded', + 'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.', + 'timestamp': 1112760000, + 'upload_date': '20050406', + }, + }, { + 'url': 'http://southpark.cc.com/collections/7758/fan-favorites/1', + 'only_matching': True, + }] + + +class SouthParkEsIE(SouthParkIE): + IE_NAME = 'southpark.cc.com:español' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/episodios-en-espanol/(?P<id>.+?)(\?|#|$))' + _LANG = 'es' + + _TESTS = [{ + 'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate', + 'info_dict': { + 'title': 'Cartman Consigue Una Sonda Anal', + 'description': 'Cartman Consigue Una Sonda Anal', + }, + 'playlist_count': 4, + 'skip': 'Geo-restricted', + }] + + +class SouthParkDeIE(SouthParkIE): + IE_NAME = 'southpark.de' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.de/(?:clips|alle-episoden|collections)/(?P<id>.+?)(\?|#|$))' + _FEED_URL = 'http://www.southpark.de/feeds/video-player/mrss/' + + _TESTS = [{ + 'url': 'http://www.southpark.de/clips/uygssh/the-government-wont-respect-my-privacy#tab=featured', + 'info_dict': { + 'id': '85487c96-b3b9-4e39-9127-ad88583d9bf2', + 'ext': 'mp4', + 'title': 'South Park|The Government Won\'t Respect My Privacy', + 'description': 'Cartman explains the benefits of "Shitter" to Stan, Kyle and Craig.', + 'timestamp': 1380160800, + 'upload_date': '20130926', + }, + }, { + # non-ASCII characters in initial URL + 'url': 'http://www.southpark.de/alle-episoden/s18e09-hashtag-aufwärmen', + 'info_dict': { + 'title': 'Hashtag „Aufwärmen“', + 'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.', + }, + 'playlist_count': 3, + }, { + # non-ASCII characters in redirect URL + 'url': 'http://www.southpark.de/alle-episoden/s18e09', + 'info_dict': { + 'title': 'Hashtag „Aufwärmen“', + 'description': 'Kyle will mit seinem kleinen Bruder Ike Videospiele spielen. Als der nicht mehr mit ihm spielen will, hat Kyle Angst, dass er die Kids von heute nicht mehr versteht.', + }, + 'playlist_count': 3, + }, { + 'url': 'http://www.southpark.de/collections/2476/superhero-showdown/1', + 'only_matching': True, + }] + + +class SouthParkNlIE(SouthParkIE): + IE_NAME = 'southpark.nl' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.nl/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))' + _FEED_URL = 'http://www.southpark.nl/feeds/video-player/mrss/' + + _TESTS = [{ + 'url': 'http://www.southpark.nl/full-episodes/s18e06-freemium-isnt-free', + 'info_dict': { + 'title': 'Freemium Isn\'t Free', + 'description': 'Stan is addicted to the new Terrance and Phillip mobile game.', + }, + 'playlist_mincount': 3, + }] + + +class SouthParkDkIE(SouthParkIE): + IE_NAME = 'southparkstudios.dk' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southparkstudios\.(?:dk|nu)/(?:clips|full-episodes|collections)/(?P<id>.+?)(\?|#|$))' + _FEED_URL = 'http://www.southparkstudios.dk/feeds/video-player/mrss/' + + _TESTS = [{ + 'url': 'http://www.southparkstudios.dk/full-episodes/s18e07-grounded-vindaloop', + 'info_dict': { + 'title': 'Grounded Vindaloop', + 'description': 'Butters is convinced he\'s living in a virtual reality.', + }, + 'playlist_mincount': 3, + }, { + 'url': 'http://www.southparkstudios.dk/collections/2476/superhero-showdown/1', + 'only_matching': True, + }, { + 'url': 'http://www.southparkstudios.nu/collections/2476/superhero-showdown/1', + 'only_matching': True, + }] diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py new file mode 100644 index 000000000..f11d728ca --- /dev/null +++ b/youtube_dl/extractor/spankbang.py @@ -0,0 +1,171 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + orderedSet, + parse_duration, + parse_resolution, + str_to_int, + url_or_none, + urlencode_postdata, +) + + +class SpankBangIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b' + _TESTS = [{ + 'url': 'http://spankbang.com/3vvn/video/fantasy+solo', + 'md5': '1cc433e1d6aa14bc376535b8679302f7', + 'info_dict': { + 'id': '3vvn', + 'ext': 'mp4', + 'title': 'fantasy solo', + 'description': 'dillion harper masturbates on a bed', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'silly2587', + 'age_limit': 18, + } + }, { + # 480p only + 'url': 'http://spankbang.com/1vt0/video/solvane+gangbang', + 'only_matching': True, + }, { + # no uploader + 'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2', + 'only_matching': True, + }, { + # mobile page + 'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name', + 'only_matching': True, + }, { + # 4k + 'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k', + 'only_matching': True, + }, { + 'url': 'https://m.spankbang.com/3vvn/play/fantasy+solo/480p/', + 'only_matching': True, + }, { + 'url': 'https://m.spankbang.com/3vvn/play', + 'only_matching': True, + }, { + 'url': 'https://spankbang.com/2y3td/embed/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + url.replace('/%s/embed' % video_id, '/%s/video' % video_id), + video_id, headers={'Cookie': 'country=US'}) + + if re.search(r'<[^>]+\bid=["\']video_removed', webpage): + raise ExtractorError( + 'Video %s is not available' % video_id, expected=True) + + formats = [] + + def extract_format(format_id, format_url): + f_url = url_or_none(format_url) + if not f_url: + return + f = parse_resolution(format_id) + f.update({ + 'url': f_url, + 'format_id': format_id, + }) + formats.append(f) + + STREAM_URL_PREFIX = 'stream_url_' + + for mobj in re.finditer( + r'%s(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2' + % STREAM_URL_PREFIX, webpage): + extract_format(mobj.group('id', 'url')) + + if not formats: + stream_key = self._search_regex( + r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', + webpage, 'stream key', group='value') + + sb_csrf_session = self._get_cookies( + 'https://spankbang.com')['sb_csrf_session'].value + + stream = self._download_json( + 'https://spankbang.com/api/videos/stream', video_id, + 'Downloading stream JSON', data=urlencode_postdata({ + 'id': stream_key, + 'data': 0, + 'sb_csrf_session': sb_csrf_session, + }), headers={ + 'Referer': url, + 'X-CSRFToken': sb_csrf_session, + }) + + for format_id, format_url in stream.items(): + if format_id.startswith(STREAM_URL_PREFIX): + extract_format( + format_id[len(STREAM_URL_PREFIX):], format_url) + + self._sort_formats(formats) + + title = self._html_search_regex( + r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title') + description = self._search_regex( + r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)', + webpage, 'description', fatal=False) + thumbnail = self._og_search_thumbnail(webpage) + uploader = self._search_regex( + r'class="user"[^>]*><img[^>]+>([^<]+)', + webpage, 'uploader', default=None) + duration = parse_duration(self._search_regex( + r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)', + webpage, 'duration', fatal=False)) + view_count = str_to_int(self._search_regex( + r'([\d,.]+)\s+plays', webpage, 'view count', fatal=False)) + + age_limit = self._rta_search(webpage) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'duration': duration, + 'view_count': view_count, + 'formats': formats, + 'age_limit': age_limit, + } + + +class SpankBangPlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+' + _TEST = { + 'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties', + 'info_dict': { + 'id': 'ug0k', + 'title': 'Big Ass Titties', + }, + 'playlist_mincount': 50, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage( + url, playlist_id, headers={'Cookie': 'country=US; mobile=on'}) + + entries = [self.url_result( + 'https://spankbang.com/%s/video' % video_id, + ie=SpankBangIE.ie_key(), video_id=video_id) + for video_id in orderedSet(re.findall( + r'<a[^>]+\bhref=["\']/?([\da-z]+)/play/', webpage))] + + title = self._html_search_regex( + r'<h1>([^<]+)\s+playlist</h1>', webpage, 'playlist title', + fatal=False) + + return self.playlist_result(entries, playlist_id, title) diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py new file mode 100644 index 000000000..44d8fa52f --- /dev/null +++ b/youtube_dl/extractor/spankwire.py @@ -0,0 +1,127 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_urllib_parse_unquote, + compat_urllib_parse_urlparse, +) +from ..utils import ( + sanitized_Request, + str_to_int, + unified_strdate, +) +from ..aes import aes_decrypt_text + + +class SpankwireIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<id>[0-9]+)/?)' + _TESTS = [{ + # download URL pattern: */<height>P_<tbr>K_<video_id>.mp4 + 'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/', + 'md5': '8bbfde12b101204b39e4b9fe7eb67095', + 'info_dict': { + 'id': '103545', + 'ext': 'mp4', + 'title': 'Buckcherry`s X Rated Music Video Crazy Bitch', + 'description': 'Crazy Bitch X rated music video.', + 'uploader': 'oreusz', + 'uploader_id': '124697', + 'upload_date': '20070507', + 'age_limit': 18, + } + }, { + # download URL pattern: */mp4_<format_id>_<video_id>.mp4 + 'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/', + 'md5': '09b3c20833308b736ae8902db2f8d7e6', + 'info_dict': { + 'id': '1921551', + 'ext': 'mp4', + 'title': 'Titcums Compiloation I', + 'description': 'cum on tits', + 'uploader': 'dannyh78999', + 'uploader_id': '3056053', + 'upload_date': '20150822', + 'age_limit': 18, + }, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + req = sanitized_Request('http://www.' + mobj.group('url')) + req.add_header('Cookie', 'age_verified=1') + webpage = self._download_webpage(req, video_id) + + title = self._html_search_regex( + r'<h1>([^<]+)', webpage, 'title') + description = self._html_search_regex( + r'(?s)<div\s+id="descriptionContent">(.+?)</div>', + webpage, 'description', fatal=False) + thumbnail = self._html_search_regex( + r'playerData\.screenShot\s*=\s*["\']([^"\']+)["\']', + webpage, 'thumbnail', fatal=False) + + uploader = self._html_search_regex( + r'by:\s*<a [^>]*>(.+?)</a>', + webpage, 'uploader', fatal=False) + uploader_id = self._html_search_regex( + r'by:\s*<a href="/(?:user/viewProfile|Profile\.aspx)\?.*?UserId=(\d+).*?"', + webpage, 'uploader id', fatal=False) + upload_date = unified_strdate(self._html_search_regex( + r'</a> on (.+?) at \d+:\d+', + webpage, 'upload date', fatal=False)) + + view_count = str_to_int(self._html_search_regex( + r'<div id="viewsCounter"><span>([\d,\.]+)</span> views</div>', + webpage, 'view count', fatal=False)) + comment_count = str_to_int(self._html_search_regex( + r'<span\s+id="spCommentCount"[^>]*>([\d,\.]+)</span>', + webpage, 'comment count', fatal=False)) + + videos = re.findall( + r'playerData\.cdnPath([0-9]{3,})\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage) + heights = [int(video[0]) for video in videos] + video_urls = list(map(compat_urllib_parse_unquote, [video[1] for video in videos])) + if webpage.find(r'flashvars\.encrypted = "true"') != -1: + password = self._search_regex( + r'flashvars\.video_title = "([^"]+)', + webpage, 'password').replace('+', ' ') + video_urls = list(map( + lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), + video_urls)) + + formats = [] + for height, video_url in zip(heights, video_urls): + path = compat_urllib_parse_urlparse(video_url).path + m = re.search(r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', path) + if m: + tbr = int(m.group('tbr')) + height = int(m.group('height')) + else: + tbr = None + formats.append({ + 'url': video_url, + 'format_id': '%dp' % height, + 'height': height, + 'tbr': tbr, + }) + self._sort_formats(formats) + + age_limit = self._rta_search(webpage) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'upload_date': upload_date, + 'view_count': view_count, + 'comment_count': comment_count, + 'formats': formats, + 'age_limit': age_limit, + } diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py new file mode 100644 index 000000000..4df7f4ddc --- /dev/null +++ b/youtube_dl/extractor/spiegel.py @@ -0,0 +1,159 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .nexx import ( + NexxIE, + NexxEmbedIE, +) +from .spiegeltv import SpiegeltvIE +from ..compat import compat_urlparse +from ..utils import ( + parse_duration, + strip_or_none, + unified_timestamp, +) + + +class SpiegelIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$' + _TESTS = [{ + 'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', + 'md5': 'b57399839d055fccfeb9a0455c439868', + 'info_dict': { + 'id': '563747', + 'ext': 'mp4', + 'title': 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv', + 'description': 'md5:8029d8310232196eb235d27575a8b9f4', + 'duration': 49, + 'upload_date': '20130311', + 'timestamp': 1362994320, + }, + }, { + 'url': 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html', + 'md5': '5b6c2f4add9d62912ed5fc78a1faed80', + 'info_dict': { + 'id': '580988', + 'ext': 'mp4', + 'title': 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers', + 'description': 'md5:c2322b65e58f385a820c10fa03b2d088', + 'duration': 983, + 'upload_date': '20131115', + 'timestamp': 1384546642, + }, + }, { + 'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html', + 'md5': '97b91083a672d72976faa8433430afb9', + 'info_dict': { + 'id': '601883', + 'ext': 'mp4', + 'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.', + 'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"', + 'upload_date': '20140904', + 'timestamp': 1409834160, + } + }, { + 'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html', + 'only_matching': True, + }, { + # nexx video + 'url': 'http://www.spiegel.de/video/spiegel-tv-magazin-ueber-guellekrise-in-schleswig-holstein-video-99012776.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + metadata_url = 'http://www.spiegel.de/video/metadata/video-%s.json' % video_id + handle = self._request_webpage(metadata_url, video_id) + + # 302 to spiegel.tv, like http://www.spiegel.de/video/der-film-zum-wochenende-die-wahrheit-ueber-maenner-video-99003272.html + if SpiegeltvIE.suitable(handle.geturl()): + return self.url_result(handle.geturl(), 'Spiegeltv') + + video_data = self._parse_json(self._webpage_read_content( + handle, metadata_url, video_id), video_id) + title = video_data['title'] + nexx_id = video_data['nexxOmniaId'] + domain_id = video_data.get('nexxOmniaDomain') or '748' + + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': 'nexx:%s:%s' % (domain_id, nexx_id), + 'title': title, + 'description': strip_or_none(video_data.get('teaser')), + 'duration': parse_duration(video_data.get('duration')), + 'timestamp': unified_timestamp(video_data.get('datum')), + 'ie_key': NexxIE.ie_key(), + } + + +class SpiegelArticleIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html' + IE_NAME = 'Spiegel:Article' + IE_DESC = 'Articles on spiegel.de' + _TESTS = [{ + 'url': 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html', + 'info_dict': { + 'id': '1516455', + 'ext': 'mp4', + 'title': 'Faszination Badminton: Nennt es bloß nicht Federball', + 'description': 're:^Patrick Kämnitz gehört.{100,}', + 'upload_date': '20140825', + }, + }, { + 'url': 'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html', + 'info_dict': { + + }, + 'playlist_count': 6, + }, { + # Nexx iFrame embed + 'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html', + 'info_dict': { + 'id': '161464', + 'ext': 'mp4', + 'title': 'Nervenkitzel Achterbahn', + 'alt_title': 'Karussellbauer in Deutschland', + 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc', + 'release_year': 2005, + 'creator': 'SPIEGEL TV', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 2761, + 'timestamp': 1394021479, + 'upload_date': '20140305', + }, + 'params': { + 'format': 'bestvideo', + 'skip_download': True, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + # Single video on top of the page + video_link = self._search_regex( + r'<a href="([^"]+)" onclick="return spOpenVideo\(this,', webpage, + 'video page URL', default=None) + if video_link: + video_url = compat_urlparse.urljoin( + self.http_scheme() + '//spiegel.de/', video_link) + return self.url_result(video_url) + + # Multiple embedded videos + embeds = re.findall( + r'<div class="vid_holder[0-9]+.*?</div>\s*.*?url\s*=\s*"([^"]+)"', + webpage) + entries = [ + self.url_result(compat_urlparse.urljoin( + self.http_scheme() + '//spiegel.de/', embed_path)) + for embed_path in embeds] + if embeds: + return self.playlist_result(entries) + + return self.playlist_from_matches( + NexxEmbedIE._extract_urls(webpage), ie=NexxEmbedIE.ie_key()) diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dl/extractor/spiegeltv.py new file mode 100644 index 000000000..6ccf4c342 --- /dev/null +++ b/youtube_dl/extractor/spiegeltv.py @@ -0,0 +1,17 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from .nexx import NexxIE + + +class SpiegeltvIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/videos/(?P<id>\d+)' + _TEST = { + 'url': 'http://www.spiegel.tv/videos/161681-flug-mh370/', + 'only_matching': True, + } + + def _real_extract(self, url): + return self.url_result( + 'https://api.nexx.cloud/v3/748/videos/byid/%s' + % self._match_id(url), ie=NexxIE.ie_key()) diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py new file mode 100644 index 000000000..21b93a5b3 --- /dev/null +++ b/youtube_dl/extractor/spike.py @@ -0,0 +1,57 @@ +from __future__ import unicode_literals + +from .mtv import MTVServicesInfoExtractor + + +class BellatorIE(MTVServicesInfoExtractor): + _VALID_URL = r'https?://(?:www\.)?bellator\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)' + _TESTS = [{ + 'url': 'http://www.bellator.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg', + 'info_dict': { + 'id': 'b55e434e-fde1-4a98-b7cc-92003a034de4', + 'ext': 'mp4', + 'title': 'Douglas Lima vs. Paul Daley - Round 1', + 'description': 'md5:805a8dd29310fd611d32baba2f767885', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.bellator.com/video-clips/bw6k7n/bellator-158-foundations-michael-venom-page', + 'only_matching': True, + }] + + _FEED_URL = 'http://www.spike.com/feeds/mrss/' + _GEO_COUNTRIES = ['US'] + + +class ParamountNetworkIE(MTVServicesInfoExtractor): + _VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)' + _TESTS = [{ + 'url': 'http://www.paramountnetwork.com/episodes/j830qm/lip-sync-battle-joel-mchale-vs-jim-rash-season-2-ep-13', + 'info_dict': { + 'id': '37ace3a8-1df6-48be-85b8-38df8229e241', + 'ext': 'mp4', + 'title': 'Lip Sync Battle|April 28, 2016|2|209|Joel McHale Vs. Jim Rash|Act 1', + 'description': 'md5:a739ca8f978a7802f67f8016d27ce114', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }] + + _FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/' + _GEO_COUNTRIES = ['US'] + + def _extract_mgid(self, webpage): + root_data = self._parse_json(self._search_regex( + r'window\.__DATA__\s*=\s*({.+})', + webpage, 'data'), None) + + def find_sub_data(data, data_type): + return next(c for c in data['children'] if c.get('type') == data_type) + + c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer') + return c['props']['media']['video']['config']['uri'] diff --git a/youtube_dl/extractor/sport5.py b/youtube_dl/extractor/sport5.py new file mode 100644 index 000000000..a417b5a4e --- /dev/null +++ b/youtube_dl/extractor/sport5.py @@ -0,0 +1,92 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class Sport5IE(InfoExtractor): + _VALID_URL = r'https?://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)' + _TESTS = [ + { + 'url': 'http://vod.sport5.co.il/?Vc=147&Vi=176331&Page=1', + 'info_dict': { + 'id': 's5-Y59xx1-GUh2', + 'ext': 'mp4', + 'title': 'ולנסיה-קורדובה 0:3', + 'description': 'אלקאסר, גאייה ופגולי סידרו לקבוצה של נונו ניצחון על קורדובה ואת המקום הראשון בליגה', + 'duration': 228, + 'categories': list, + }, + 'skip': 'Blocked outside of Israel', + }, { + 'url': 'http://www.sport5.co.il/articles.aspx?FolderID=3075&docID=176372&lang=HE', + 'info_dict': { + 'id': 's5-SiXxx1-hKh2', + 'ext': 'mp4', + 'title': 'GOALS_CELTIC_270914.mp4', + 'description': '', + 'duration': 87, + 'categories': list, + }, + 'skip': 'Blocked outside of Israel', + } + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + media_id = mobj.group('id') + + webpage = self._download_webpage(url, media_id) + + video_id = self._html_search_regex(r'clipId=([\w-]+)', webpage, 'video id') + + metadata = self._download_xml( + 'http://sport5-metadata-rr-d.nsacdn.com/vod/vod/%s/HDS/metadata.xml' % video_id, + video_id) + + error = metadata.find('./Error') + if error is not None: + raise ExtractorError( + '%s returned error: %s - %s' % ( + self.IE_NAME, + error.find('./Name').text, + error.find('./Description').text), + expected=True) + + title = metadata.find('./Title').text + description = metadata.find('./Description').text + duration = int(metadata.find('./Duration').text) + + posters_el = metadata.find('./PosterLinks') + thumbnails = [{ + 'url': thumbnail.text, + 'width': int(thumbnail.get('width')), + 'height': int(thumbnail.get('height')), + } for thumbnail in posters_el.findall('./PosterIMG')] if posters_el is not None else [] + + categories_el = metadata.find('./Categories') + categories = [ + cat.get('name') for cat in categories_el.findall('./Category') + ] if categories_el is not None else [] + + formats = [{ + 'url': fmt.text, + 'ext': 'mp4', + 'vbr': int(fmt.get('bitrate')), + 'width': int(fmt.get('width')), + 'height': int(fmt.get('height')), + } for fmt in metadata.findall('./PlaybackLinks/FileURL')] + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnails': thumbnails, + 'duration': duration, + 'categories': categories, + 'formats': formats, + } diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dl/extractor/sportbox.py new file mode 100644 index 000000000..b9017fd2a --- /dev/null +++ b/youtube_dl/extractor/sportbox.py @@ -0,0 +1,99 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + js_to_json, + merge_dicts, +) + + +class SportBoxIE(InfoExtractor): + _VALID_URL = r'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://news.sportbox.ru/vdl/player/ci/211355', + 'info_dict': { + 'id': '109158', + 'ext': 'mp4', + 'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»', + 'description': 'В Новороссийске прошел детский турнир «Поле славы боевой»', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 292, + 'view_count': int, + 'timestamp': 1426237001, + 'upload_date': '20150313', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://news.sportbox.ru/vdl/player?nid=370908&only_player=1&autostart=false&playeri=2&height=340&width=580', + 'only_matching': True, + }, { + 'url': 'https://news.sportbox.ru/vdl/player/media/193095', + 'only_matching': True, + }, { + 'url': 'https://news.sportbox.ru/vdl/player/media/109158', + 'only_matching': True, + }, { + 'url': 'https://matchtv.ru/vdl/player/media/109158', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return re.findall( + r'<iframe[^>]+src="(https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"', + webpage) + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + sources = self._parse_json( + self._search_regex( + r'(?s)playerOptions\.sources(?:WithRes)?\s*=\s*(\[.+?\])\s*;\s*\n', + webpage, 'sources'), + video_id, transform_source=js_to_json) + + formats = [] + for source in sources: + src = source.get('src') + if not src: + continue + if determine_ext(src) == 'm3u8': + formats.extend(self._extract_m3u8_formats( + src, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + else: + formats.append({ + 'url': src, + }) + self._sort_formats(formats) + + player = self._parse_json( + self._search_regex( + r'(?s)playerOptions\s*=\s*({.+?})\s*;\s*\n', webpage, + 'player options', default='{}'), + video_id, transform_source=js_to_json) + media_id = player['mediaId'] + + info = self._search_json_ld(webpage, media_id, default={}) + + view_count = int_or_none(self._search_regex( + r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None)) + + return merge_dicts(info, { + 'id': media_id, + 'title': self._og_search_title(webpage, default=None) or media_id, + 'thumbnail': player.get('poster'), + 'duration': int_or_none(player.get('duration')), + 'view_count': view_count, + 'formats': formats, + }) diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py new file mode 100644 index 000000000..a3c35a899 --- /dev/null +++ b/youtube_dl/extractor/sportdeutschland.py @@ -0,0 +1,100 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_iso8601, + sanitized_Request, +) + + +class SportDeutschlandIE(InfoExtractor): + _VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])' + _TESTS = [{ + 'url': 'http://sportdeutschland.tv/badminton/live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen', + 'info_dict': { + 'id': 'live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen', + 'ext': 'mp4', + 'title': 're:Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen', + 'categories': ['Badminton'], + 'view_count': int, + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': r're:Die Badminton-WM 2014 aus Kopenhagen bei Sportdeutschland\.TV', + 'timestamp': int, + 'upload_date': 're:^201408[23][0-9]$', + }, + 'params': { + 'skip_download': 'Live stream', + }, + }, { + 'url': 'http://sportdeutschland.tv/li-ning-badminton-wm-2014/lee-li-ning-badminton-weltmeisterschaft-2014-kopenhagen-herren-einzel-wei-vs', + 'info_dict': { + 'id': 'lee-li-ning-badminton-weltmeisterschaft-2014-kopenhagen-herren-einzel-wei-vs', + 'ext': 'mp4', + 'upload_date': '20140825', + 'description': 'md5:60a20536b57cee7d9a4ec005e8687504', + 'timestamp': 1408976060, + 'duration': 2732, + 'title': 'Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen: Herren Einzel, Wei Lee vs. Keun Lee', + 'thumbnail': r're:^https?://.*\.jpg$', + 'view_count': int, + 'categories': ['Li-Ning Badminton WM 2014'], + + } + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + sport_id = mobj.group('sport') + + api_url = 'http://proxy.vidibusdynamic.net/sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % ( + sport_id, video_id) + req = sanitized_Request(api_url, headers={ + 'Accept': 'application/vnd.vidibus.v2.html+json', + 'Referer': url, + }) + data = self._download_json(req, video_id) + + asset = data['asset'] + categories = [data['section']['title']] + + formats = [] + smil_url = asset['video'] + if '.smil' in smil_url: + m3u8_url = smil_url.replace('.smil', '.m3u8') + formats.extend( + self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')) + + smil_doc = self._download_xml( + smil_url, video_id, note='Downloading SMIL metadata') + base_url_el = smil_doc.find('./head/meta') + if base_url_el: + base_url = base_url_el.attrib['base'] + formats.extend([{ + 'format_id': 'rmtp', + 'url': base_url if base_url_el else n.attrib['src'], + 'play_path': n.attrib['src'], + 'ext': 'flv', + 'preference': -100, + 'format_note': 'Seems to fail at example stream', + } for n in smil_doc.findall('./body/video')]) + else: + formats.append({'url': smil_url}) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'title': asset['title'], + 'thumbnail': asset.get('image'), + 'description': asset.get('teaser'), + 'duration': asset.get('duration'), + 'categories': categories, + 'view_count': asset.get('views'), + 'rtmp_live': asset.get('live'), + 'timestamp': parse_iso8601(asset.get('date')), + } diff --git a/youtube_dl/extractor/springboardplatform.py b/youtube_dl/extractor/springboardplatform.py new file mode 100644 index 000000000..07d99b579 --- /dev/null +++ b/youtube_dl/extractor/springboardplatform.py @@ -0,0 +1,125 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + xpath_attr, + xpath_text, + xpath_element, + unescapeHTML, + unified_timestamp, +) + + +class SpringboardPlatformIE(InfoExtractor): + _VALID_URL = r'''(?x) + https?:// + cms\.springboardplatform\.com/ + (?: + (?:previews|embed_iframe)/(?P<index>\d+)/video/(?P<id>\d+)| + xml_feeds_advanced/index/(?P<index_2>\d+)/rss3/(?P<id_2>\d+) + ) + ''' + _TESTS = [{ + 'url': 'http://cms.springboardplatform.com/previews/159/video/981017/0/0/1', + 'md5': '5c3cb7b5c55740d482561099e920f192', + 'info_dict': { + 'id': '981017', + 'ext': 'mp4', + 'title': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX', + 'description': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX', + 'thumbnail': r're:^https?://.*\.jpg$', + 'timestamp': 1409132328, + 'upload_date': '20140827', + 'duration': 193, + }, + }, { + 'url': 'http://cms.springboardplatform.com/embed_iframe/159/video/981017/rab007/rapbasement.com/1/1', + 'only_matching': True, + }, { + 'url': 'http://cms.springboardplatform.com/embed_iframe/20/video/1731611/ki055/kidzworld.com/10', + 'only_matching': True, + }, { + 'url': 'http://cms.springboardplatform.com/xml_feeds_advanced/index/159/rss3/981017/0/0/1/', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') + for mobj in re.finditer( + r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cms\.springboardplatform\.com/embed_iframe/\d+/video/\d+.*?)\1', + webpage)] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') or mobj.group('id_2') + index = mobj.group('index') or mobj.group('index_2') + + video = self._download_xml( + 'http://cms.springboardplatform.com/xml_feeds_advanced/index/%s/rss3/%s' + % (index, video_id), video_id) + + item = xpath_element(video, './/item', 'item', fatal=True) + + content = xpath_element( + item, './{http://search.yahoo.com/mrss/}content', 'content', + fatal=True) + title = unescapeHTML(xpath_text(item, './title', 'title', fatal=True)) + + video_url = content.attrib['url'] + + if 'error_video.mp4' in video_url: + raise ExtractorError( + 'Video %s no longer exists' % video_id, expected=True) + + duration = int_or_none(content.get('duration')) + tbr = int_or_none(content.get('bitrate')) + filesize = int_or_none(content.get('fileSize')) + width = int_or_none(content.get('width')) + height = int_or_none(content.get('height')) + + description = unescapeHTML(xpath_text( + item, './description', 'description')) + thumbnail = xpath_attr( + item, './{http://search.yahoo.com/mrss/}thumbnail', 'url', + 'thumbnail') + + timestamp = unified_timestamp(xpath_text( + item, './{http://cms.springboardplatform.com/namespaces.html}created', + 'timestamp')) + + formats = [{ + 'url': video_url, + 'format_id': 'http', + 'tbr': tbr, + 'filesize': filesize, + 'width': width, + 'height': height, + }] + + m3u8_format = formats[0].copy() + m3u8_format.update({ + 'url': re.sub(r'(https?://)cdn\.', r'\1hls.', video_url) + '.m3u8', + 'ext': 'mp4', + 'format_id': 'hls', + 'protocol': 'm3u8_native', + }) + formats.append(m3u8_format) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'timestamp': timestamp, + 'duration': duration, + 'formats': formats, + } diff --git a/youtube_dl/extractor/sprout.py b/youtube_dl/extractor/sprout.py new file mode 100644 index 000000000..8467bf49d --- /dev/null +++ b/youtube_dl/extractor/sprout.py @@ -0,0 +1,52 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .adobepass import AdobePassIE +from ..utils import ( + extract_attributes, + update_url_query, + smuggle_url, +) + + +class SproutIE(AdobePassIE): + _VALID_URL = r'https?://(?:www\.)?sproutonline\.com/watch/(?P<id>[^/?#]+)' + _TEST = { + 'url': 'http://www.sproutonline.com/watch/cowboy-adventure', + 'md5': '74bf14128578d1e040c3ebc82088f45f', + 'info_dict': { + 'id': '9dexnwtmh8_X', + 'ext': 'mp4', + 'title': 'A Cowboy Adventure', + 'description': 'Ruff-Ruff, Tweet and Dave get to be cowboys for the day at Six Cow Corral.', + 'timestamp': 1437758640, + 'upload_date': '20150724', + 'uploader': 'NBCU-SPROUT-NEW', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_component = self._search_regex( + r'(?s)(<div[^>]+data-component="video"[^>]*?>)', + webpage, 'video component', default=None) + if video_component: + options = self._parse_json(extract_attributes( + video_component)['data-options'], video_id) + theplatform_url = options['video'] + query = { + 'mbr': 'true', + 'manifest': 'm3u', + } + if options.get('protected'): + query['auth'] = self._extract_mvpd_auth(url, options['pid'], 'sprout', 'sprout') + theplatform_url = smuggle_url(update_url_query( + theplatform_url, query), {'force_smil_url': True}) + else: + iframe = self._search_regex( + r'(<iframe[^>]+id="sproutVideoIframe"[^>]*?>)', + webpage, 'iframe') + theplatform_url = extract_attributes(iframe)['src'] + + return self.url_result(theplatform_url, 'ThePlatform') diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dl/extractor/srgssr.py new file mode 100644 index 000000000..170dce87f --- /dev/null +++ b/youtube_dl/extractor/srgssr.py @@ -0,0 +1,186 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlparse +from ..utils import ( + ExtractorError, + parse_iso8601, + qualities, +) + + +class SRGSSRIE(InfoExtractor): + _VALID_URL = r'(?:https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|srgssr):(?P<bu>srf|rts|rsi|rtr|swi):(?:[^:]+:)?(?P<type>video|audio):(?P<id>[0-9a-f\-]{36}|\d+)' + _GEO_BYPASS = False + _GEO_COUNTRIES = ['CH'] + + _ERRORS = { + 'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.', + 'AGERATING18': 'To protect children under the age of 18, this video is only available between 11 p.m. and 5 a.m.', + # 'ENDDATE': 'For legal reasons, this video was only available for a specified period of time.', + 'GEOBLOCK': 'For legal reasons, this video is only available in Switzerland.', + 'LEGAL': 'The video cannot be transmitted for legal reasons.', + 'STARTDATE': 'This video is not yet available. Please try again later.', + } + + def _get_tokenized_src(self, url, video_id, format_id): + sp = compat_urllib_parse_urlparse(url).path.split('/') + token = self._download_json( + 'http://tp.srgssr.ch/akahd/token?acl=/%s/%s/*' % (sp[1], sp[2]), + video_id, 'Downloading %s token' % format_id, fatal=False) or {} + auth_params = token.get('token', {}).get('authparams') + if auth_params: + url += '?' + auth_params + return url + + def get_media_data(self, bu, media_type, media_id): + media_data = self._download_json( + 'http://il.srgssr.ch/integrationlayer/1.0/ue/%s/%s/play/%s.json' % (bu, media_type, media_id), + media_id)[media_type.capitalize()] + + if media_data.get('block') and media_data['block'] in self._ERRORS: + message = self._ERRORS[media_data['block']] + if media_data['block'] == 'GEOBLOCK': + self.raise_geo_restricted( + msg=message, countries=self._GEO_COUNTRIES) + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, message), expected=True) + + return media_data + + def _real_extract(self, url): + bu, media_type, media_id = re.match(self._VALID_URL, url).groups() + + media_data = self.get_media_data(bu, media_type, media_id) + + metadata = media_data['AssetMetadatas']['AssetMetadata'][0] + title = metadata['title'] + description = metadata.get('description') + created_date = media_data.get('createdDate') or metadata.get('createdDate') + timestamp = parse_iso8601(created_date) + + thumbnails = [{ + 'id': image.get('id'), + 'url': image['url'], + } for image in media_data.get('Image', {}).get('ImageRepresentations', {}).get('ImageRepresentation', [])] + + preference = qualities(['LQ', 'MQ', 'SD', 'HQ', 'HD']) + formats = [] + for source in media_data.get('Playlists', {}).get('Playlist', []) + media_data.get('Downloads', {}).get('Download', []): + protocol = source.get('@protocol') + for asset in source['url']: + asset_url = asset['text'] + quality = asset['@quality'] + format_id = '%s-%s' % (protocol, quality) + if protocol.startswith('HTTP-HDS') or protocol.startswith('HTTP-HLS'): + asset_url = self._get_tokenized_src(asset_url, media_id, format_id) + if protocol.startswith('HTTP-HDS'): + formats.extend(self._extract_f4m_formats( + asset_url + ('?' if '?' not in asset_url else '&') + 'hdcore=3.4.0', + media_id, f4m_id=format_id, fatal=False)) + elif protocol.startswith('HTTP-HLS'): + formats.extend(self._extract_m3u8_formats( + asset_url, media_id, 'mp4', 'm3u8_native', + m3u8_id=format_id, fatal=False)) + else: + formats.append({ + 'format_id': format_id, + 'url': asset_url, + 'preference': preference(quality), + 'ext': 'flv' if protocol == 'RTMP' else None, + }) + self._sort_formats(formats) + + return { + 'id': media_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'thumbnails': thumbnails, + 'formats': formats, + } + + +class SRGSSRPlayIE(InfoExtractor): + IE_DESC = 'srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites' + _VALID_URL = r'''(?x) + https?:// + (?:(?:www|play)\.)? + (?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/ + (?: + [^/]+/(?P<type>video|audio)/[^?]+| + popup(?P<type_2>video|audio)player + ) + \?id=(?P<id>[0-9a-f\-]{36}|\d+) + ''' + + _TESTS = [{ + 'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5', + 'md5': 'da6b5b3ac9fa4761a942331cef20fcb3', + 'info_dict': { + 'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5', + 'ext': 'mp4', + 'upload_date': '20130701', + 'title': 'Snowden beantragt Asyl in Russland', + 'timestamp': 1372713995, + } + }, { + # No Speichern (Save) button + 'url': 'http://www.srf.ch/play/tv/top-gear/video/jaguar-xk120-shadow-und-tornado-dampflokomotive?id=677f5829-e473-4823-ac83-a1087fe97faa', + 'md5': '0a274ce38fda48c53c01890651985bc6', + 'info_dict': { + 'id': '677f5829-e473-4823-ac83-a1087fe97faa', + 'ext': 'flv', + 'upload_date': '20130710', + 'title': 'Jaguar XK120, Shadow und Tornado-Dampflokomotive', + 'description': 'md5:88604432b60d5a38787f152dec89cd56', + 'timestamp': 1373493600, + }, + }, { + 'url': 'http://www.rtr.ch/play/radio/actualitad/audio/saira-tujetsch-tuttina-cuntinuar-cun-sedrun-muster-turissem?id=63cb0778-27f8-49af-9284-8c7a8c6d15fc', + 'info_dict': { + 'id': '63cb0778-27f8-49af-9284-8c7a8c6d15fc', + 'ext': 'mp3', + 'upload_date': '20151013', + 'title': 'Saira: Tujetsch - tuttina cuntinuar cun Sedrun Mustér Turissem', + 'timestamp': 1444750398, + }, + 'params': { + # rtmp download + 'skip_download': True, + }, + }, { + 'url': 'http://www.rts.ch/play/tv/-/video/le-19h30?id=6348260', + 'md5': '67a2a9ae4e8e62a68d0e9820cc9782df', + 'info_dict': { + 'id': '6348260', + 'display_id': '6348260', + 'ext': 'mp4', + 'duration': 1796, + 'title': 'Le 19h30', + 'description': '', + 'uploader': '19h30', + 'upload_date': '20141201', + 'timestamp': 1417458600, + 'thumbnail': r're:^https?://.*\.image', + 'view_count': int, + }, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + bu = mobj.group('bu') + media_type = mobj.group('type') or mobj.group('type_2') + media_id = mobj.group('id') + # other info can be extracted from url + '&layout=json' + return self.url_result('srgssr:%s:%s:%s' % (bu[:3], media_type, media_id), 'SRGSSR') diff --git a/youtube_dl/extractor/srmediathek.py b/youtube_dl/extractor/srmediathek.py new file mode 100644 index 000000000..28baf901c --- /dev/null +++ b/youtube_dl/extractor/srmediathek.py @@ -0,0 +1,59 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .ard import ARDMediathekIE +from ..utils import ( + ExtractorError, + get_element_by_attribute, +) + + +class SRMediathekIE(ARDMediathekIE): + IE_NAME = 'sr:mediathek' + IE_DESC = 'Saarländischer Rundfunk' + _VALID_URL = r'https?://sr-mediathek(?:\.sr-online)?\.de/index\.php\?.*?&id=(?P<id>[0-9]+)' + + _TESTS = [{ + 'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455', + 'info_dict': { + 'id': '28455', + 'ext': 'mp4', + 'title': 'sportarena (26.10.2014)', + 'description': 'Ringen: KSV Köllerbach gegen Aachen-Walheim; Frauen-Fußball: 1. FC Saarbrücken gegen Sindelfingen; Motorsport: Rallye in Losheim; dazu: Interview mit Timo Bernhard; Turnen: TG Saar; Reitsport: Deutscher Voltigier-Pokal; Badminton: Interview mit Michael Fuchs ', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + 'skip': 'no longer available', + }, { + 'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=37682', + 'info_dict': { + 'id': '37682', + 'ext': 'mp4', + 'title': 'Love, Cakes and Rock\'n\'Roll', + 'description': 'md5:18bf9763631c7d326c22603681e1123d', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://sr-mediathek.de/index.php?seite=7&id=7480', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + if '>Der gewünschte Beitrag ist leider nicht mehr verfügbar.<' in webpage: + raise ExtractorError('Video %s is no longer available' % video_id, expected=True) + + media_collection_url = self._search_regex( + r'data-mediacollection-ardplayer="([^"]+)"', webpage, 'media collection url') + info = self._extract_media_info(media_collection_url, webpage, video_id) + info.update({ + 'id': video_id, + 'title': get_element_by_attribute('class', 'ardplayer-title', webpage), + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + }) + return info diff --git a/youtube_dl/extractor/stanfordoc.py b/youtube_dl/extractor/stanfordoc.py new file mode 100644 index 000000000..ae3dd1380 --- /dev/null +++ b/youtube_dl/extractor/stanfordoc.py @@ -0,0 +1,91 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + orderedSet, + unescapeHTML, +) + + +class StanfordOpenClassroomIE(InfoExtractor): + IE_NAME = 'stanfordoc' + IE_DESC = 'Stanford Open ClassRoom' + _VALID_URL = r'https?://openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$' + _TEST = { + 'url': 'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100', + 'md5': '544a9468546059d4e80d76265b0443b8', + 'info_dict': { + 'id': 'PracticalUnix_intro-environment', + 'ext': 'mp4', + 'title': 'Intro Environment', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + + if mobj.group('course') and mobj.group('video'): # A specific video + course = mobj.group('course') + video = mobj.group('video') + info = { + 'id': course + '_' + video, + 'uploader': None, + 'upload_date': None, + } + + baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/' + xmlUrl = baseUrl + video + '.xml' + mdoc = self._download_xml(xmlUrl, info['id']) + try: + info['title'] = mdoc.findall('./title')[0].text + info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text + except IndexError: + raise ExtractorError('Invalid metadata XML file') + return info + elif mobj.group('course'): # A course page + course = mobj.group('course') + info = { + 'id': course, + '_type': 'playlist', + 'uploader': None, + 'upload_date': None, + } + + coursepage = self._download_webpage( + url, info['id'], + note='Downloading course info page', + errnote='Unable to download course info page') + + info['title'] = self._html_search_regex( + r'<h1>([^<]+)</h1>', coursepage, 'title', default=info['id']) + + info['description'] = self._html_search_regex( + r'(?s)<description>([^<]+)</description>', + coursepage, 'description', fatal=False) + + links = orderedSet(re.findall(r'<a href="(VideoPage\.php\?[^"]+)">', coursepage)) + info['entries'] = [self.url_result( + 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) + ) for l in links] + return info + else: # Root page + info = { + 'id': 'Stanford OpenClassroom', + '_type': 'playlist', + 'uploader': None, + 'upload_date': None, + } + info['title'] = info['id'] + + rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php' + rootpage = self._download_webpage(rootURL, info['id'], + errnote='Unable to download course info page') + + links = orderedSet(re.findall(r'<a href="(CoursePage\.php\?[^"]+)">', rootpage)) + info['entries'] = [self.url_result( + 'http://openclassroom.stanford.edu/MainFolder/%s' % unescapeHTML(l) + ) for l in links] + return info diff --git a/youtube_dl/extractor/steam.py b/youtube_dl/extractor/steam.py new file mode 100644 index 000000000..a6a191ceb --- /dev/null +++ b/youtube_dl/extractor/steam.py @@ -0,0 +1,149 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + extract_attributes, + ExtractorError, + get_element_by_class, + js_to_json, +) + + +class SteamIE(InfoExtractor): + _VALID_URL = r"""(?x) + https?://store\.steampowered\.com/ + (agecheck/)? + (?P<urltype>video|app)/ #If the page is only for videos or for a game + (?P<gameID>\d+)/? + (?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID + | + https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+) + """ + _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/' + _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' + _TESTS = [{ + 'url': 'http://store.steampowered.com/video/105600/', + 'playlist': [ + { + 'md5': '6a294ee0c4b1f47f5bb76a65e31e3592', + 'info_dict': { + 'id': '2040428', + 'ext': 'mp4', + 'title': 'Terraria 1.3 Trailer', + 'playlist_index': 1, + } + }, + { + 'md5': '911672b20064ca3263fa89650ba5a7aa', + 'info_dict': { + 'id': '2029566', + 'ext': 'mp4', + 'title': 'Terraria 1.2 Trailer', + 'playlist_index': 2, + } + } + ], + 'info_dict': { + 'id': '105600', + 'title': 'Terraria', + }, + 'params': { + 'playlistend': 2, + } + }, { + 'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205', + 'info_dict': { + 'id': 'X8kpJBlzD2E', + 'ext': 'mp4', + 'upload_date': '20140617', + 'title': 'FRONTIERS - Trapping', + 'description': 'md5:bf6f7f773def614054089e5769c12a6e', + 'uploader': 'AAD Productions', + 'uploader_id': 'AtomicAgeDogGames', + } + }] + + def _real_extract(self, url): + m = re.match(self._VALID_URL, url) + fileID = m.group('fileID') + if fileID: + videourl = url + playlist_id = fileID + else: + gameID = m.group('gameID') + playlist_id = gameID + videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id + + self._set_cookie('steampowered.com', 'mature_content', '1') + + webpage = self._download_webpage(videourl, playlist_id) + + if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None: + videourl = self._AGECHECK_TEMPLATE % playlist_id + self.report_age_confirmation() + webpage = self._download_webpage(videourl, playlist_id) + + flash_vars = self._parse_json(self._search_regex( + r'(?s)rgMovieFlashvars\s*=\s*({.+?});', webpage, + 'flash vars'), playlist_id, js_to_json) + + playlist_title = None + entries = [] + if fileID: + playlist_title = get_element_by_class('workshopItemTitle', webpage) + for movie in flash_vars.values(): + if not movie: + continue + youtube_id = movie.get('YOUTUBE_VIDEO_ID') + if not youtube_id: + continue + entries.append({ + '_type': 'url', + 'url': youtube_id, + 'ie_key': 'Youtube', + }) + else: + playlist_title = get_element_by_class('apphub_AppName', webpage) + for movie_id, movie in flash_vars.items(): + if not movie: + continue + video_id = self._search_regex(r'movie_(\d+)', movie_id, 'video id', fatal=False) + title = movie.get('MOVIE_NAME') + if not title or not video_id: + continue + entry = { + 'id': video_id, + 'title': title.replace('+', ' '), + } + formats = [] + flv_url = movie.get('FILENAME') + if flv_url: + formats.append({ + 'format_id': 'flv', + 'url': flv_url, + }) + highlight_element = self._search_regex( + r'(<div[^>]+id="highlight_movie_%s"[^>]+>)' % video_id, + webpage, 'highlight element', fatal=False) + if highlight_element: + highlight_attribs = extract_attributes(highlight_element) + if highlight_attribs: + entry['thumbnail'] = highlight_attribs.get('data-poster') + for quality in ('', '-hd'): + for ext in ('webm', 'mp4'): + video_url = highlight_attribs.get('data-%s%s-source' % (ext, quality)) + if video_url: + formats.append({ + 'format_id': ext + quality, + 'url': video_url, + }) + if not formats: + continue + entry['formats'] = formats + entries.append(entry) + if not entries: + raise ExtractorError('Could not find any videos') + + return self.playlist_result(entries, playlist_id, playlist_title) diff --git a/youtube_dl/extractor/stitcher.py b/youtube_dl/extractor/stitcher.py new file mode 100644 index 000000000..97d1ff681 --- /dev/null +++ b/youtube_dl/extractor/stitcher.py @@ -0,0 +1,81 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + int_or_none, + js_to_json, + unescapeHTML, +) + + +class StitcherIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?stitcher\.com/podcast/(?:[^/]+/)+e/(?:(?P<display_id>[^/#?&]+?)-)?(?P<id>\d+)(?:[/#?&]|$)' + _TESTS = [{ + 'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true', + 'md5': '391dd4e021e6edeb7b8e68fbf2e9e940', + 'info_dict': { + 'id': '40789481', + 'ext': 'mp3', + 'title': 'Machine Learning Mastery and Cancer Clusters', + 'description': 'md5:55163197a44e915a14a1ac3a1de0f2d3', + 'duration': 1604, + 'thumbnail': r're:^https?://.*\.jpg', + }, + }, { + 'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true', + 'info_dict': { + 'id': '40846275', + 'display_id': 'the-rare-hourlong-comedy-plus', + 'ext': 'mp3', + 'title': "The CW's 'Crazy Ex-Girlfriend'", + 'description': 'md5:04f1e2f98eb3f5cbb094cea0f9e19b17', + 'duration': 2235, + 'thumbnail': r're:^https?://.*\.jpg', + }, + 'params': { + 'skip_download': True, + }, + }, { + # escaped title + 'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true', + 'only_matching': True, + }, { + 'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + audio_id = mobj.group('id') + display_id = mobj.group('display_id') or audio_id + + webpage = self._download_webpage(url, display_id) + + episode = self._parse_json( + js_to_json(self._search_regex( + r'(?s)var\s+stitcher(?:Config)?\s*=\s*({.+?});\n', webpage, 'episode config')), + display_id)['config']['episode'] + + title = unescapeHTML(episode['title']) + formats = [{ + 'url': episode[episode_key], + 'ext': determine_ext(episode[episode_key]) or 'mp3', + 'vcodec': 'none', + } for episode_key in ('episodeURL',) if episode.get(episode_key)] + description = self._search_regex( + r'Episode Info:\s*</span>([^<]+)<', webpage, 'description', fatal=False) + duration = int_or_none(episode.get('duration')) + thumbnail = episode.get('episodeImage') + + return { + 'id': audio_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'duration': duration, + 'thumbnail': thumbnail, + 'formats': formats, + } diff --git a/youtube_dl/extractor/streamable.py b/youtube_dl/extractor/streamable.py new file mode 100644 index 000000000..34725274e --- /dev/null +++ b/youtube_dl/extractor/streamable.py @@ -0,0 +1,112 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + float_or_none, + int_or_none, +) + + +class StreamableIE(InfoExtractor): + _VALID_URL = r'https?://streamable\.com/(?:[es]/)?(?P<id>\w+)' + _TESTS = [ + { + 'url': 'https://streamable.com/dnd1', + 'md5': '3e3bc5ca088b48c2d436529b64397fef', + 'info_dict': { + 'id': 'dnd1', + 'ext': 'mp4', + 'title': 'Mikel Oiarzabal scores to make it 0-3 for La Real against Espanyol', + 'thumbnail': r're:https?://.*\.jpg$', + 'uploader': 'teabaker', + 'timestamp': 1454964157.35115, + 'upload_date': '20160208', + 'duration': 61.516, + 'view_count': int, + } + }, + # older video without bitrate, width/height, etc. info + { + 'url': 'https://streamable.com/moo', + 'md5': '2cf6923639b87fba3279ad0df3a64e73', + 'info_dict': { + 'id': 'moo', + 'ext': 'mp4', + 'title': '"Please don\'t eat me!"', + 'thumbnail': r're:https?://.*\.jpg$', + 'timestamp': 1426115495, + 'upload_date': '20150311', + 'duration': 12, + 'view_count': int, + } + }, + { + 'url': 'https://streamable.com/e/dnd1', + 'only_matching': True, + }, + { + 'url': 'https://streamable.com/s/okkqk/drxjds', + 'only_matching': True, + } + ] + + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r'<iframe[^>]+src=(?P<q1>[\'"])(?P<src>(?:https?:)?//streamable\.com/(?:(?!\1).+))(?P=q1)', + webpage) + if mobj: + return mobj.group('src') + + def _real_extract(self, url): + video_id = self._match_id(url) + + # Note: Using the ajax API, as the public Streamable API doesn't seem + # to return video info like the title properly sometimes, and doesn't + # include info like the video duration + video = self._download_json( + 'https://ajax.streamable.com/videos/%s' % video_id, video_id) + + # Format IDs: + # 0 The video is being uploaded + # 1 The video is being processed + # 2 The video has at least one file ready + # 3 The video is unavailable due to an error + status = video.get('status') + if status != 2: + raise ExtractorError( + 'This video is currently unavailable. It may still be uploading or processing.', + expected=True) + + title = video.get('reddit_title') or video['title'] + + formats = [] + for key, info in video['files'].items(): + if not info.get('url'): + continue + formats.append({ + 'format_id': key, + 'url': self._proto_relative_url(info['url']), + 'width': int_or_none(info.get('width')), + 'height': int_or_none(info.get('height')), + 'filesize': int_or_none(info.get('size')), + 'fps': int_or_none(info.get('framerate')), + 'vbr': float_or_none(info.get('bitrate'), 1000) + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': video.get('description'), + 'thumbnail': self._proto_relative_url(video.get('thumbnail_url')), + 'uploader': video.get('owner', {}).get('user_name'), + 'timestamp': float_or_none(video.get('date_added')), + 'duration': float_or_none(video.get('duration')), + 'view_count': int_or_none(video.get('plays')), + 'formats': formats + } diff --git a/youtube_dl/extractor/streamango.py b/youtube_dl/extractor/streamango.py new file mode 100644 index 000000000..f1e17dd88 --- /dev/null +++ b/youtube_dl/extractor/streamango.py @@ -0,0 +1,128 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_chr +from ..utils import ( + determine_ext, + ExtractorError, + int_or_none, + js_to_json, +) + + +class StreamangoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net|streamcherry\.com)/(?:f|embed)/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4', + 'md5': 'e992787515a182f55e38fc97588d802a', + 'info_dict': { + 'id': 'clapasobsptpkdfe', + 'ext': 'mp4', + 'title': '20170315_150006.mp4', + } + }, { + # no og:title + 'url': 'https://streamango.com/embed/foqebrpftarclpob/asdf_asd_2_mp4', + 'info_dict': { + 'id': 'foqebrpftarclpob', + 'ext': 'mp4', + 'title': 'foqebrpftarclpob', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'gone', + }, { + 'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4', + 'only_matching': True, + }, { + 'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4', + 'only_matching': True, + }, { + 'url': 'https://streamcherry.com/f/clapasobsptpkdfe/', + 'only_matching': True, + }] + + def _real_extract(self, url): + def decrypt_src(encoded, val): + ALPHABET = '=/+9876543210zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA' + encoded = re.sub(r'[^A-Za-z0-9+/=]', '', encoded) + decoded = '' + sm = [None] * 4 + i = 0 + str_len = len(encoded) + while i < str_len: + for j in range(4): + sm[j % 4] = ALPHABET.index(encoded[i]) + i += 1 + char_code = ((sm[0] << 0x2) | (sm[1] >> 0x4)) ^ val + decoded += compat_chr(char_code) + if sm[2] != 0x40: + char_code = ((sm[1] & 0xf) << 0x4) | (sm[2] >> 0x2) + decoded += compat_chr(char_code) + if sm[3] != 0x40: + char_code = ((sm[2] & 0x3) << 0x6) | sm[3] + decoded += compat_chr(char_code) + return decoded + + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._og_search_title(webpage, default=video_id) + + formats = [] + for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage): + mobj = re.search(r'(src\s*:\s*[^(]+\(([^)]*)\)[\s,]*)', format_) + if mobj is None: + continue + + format_ = format_.replace(mobj.group(0), '') + + video = self._parse_json( + format_, video_id, transform_source=js_to_json, + fatal=False) or {} + + mobj = re.search( + r'([\'"])(?P<src>(?:(?!\1).)+)\1\s*,\s*(?P<val>\d+)', + mobj.group(1)) + if mobj is None: + continue + + src = decrypt_src(mobj.group('src'), int_or_none(mobj.group('val'))) + if not src: + continue + + ext = determine_ext(src, default_ext=None) + if video.get('type') == 'application/dash+xml' or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + src, video_id, mpd_id='dash', fatal=False)) + else: + formats.append({ + 'url': src, + 'ext': ext or 'mp4', + 'width': int_or_none(video.get('width')), + 'height': int_or_none(video.get('height')), + 'tbr': int_or_none(video.get('bitrate')), + }) + + if not formats: + error = self._search_regex( + r'<p[^>]+\bclass=["\']lead[^>]+>(.+?)</p>', webpage, + 'error', default=None) + if not error and '>Sorry' in webpage: + error = 'Video %s is not available' % video_id + if error: + raise ExtractorError(error, expected=True) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'url': url, + 'title': title, + 'formats': formats, + } diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py new file mode 100644 index 000000000..b97bb4374 --- /dev/null +++ b/youtube_dl/extractor/streamcloud.py @@ -0,0 +1,78 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + urlencode_postdata, +) + + +class StreamcloudIE(InfoExtractor): + IE_NAME = 'streamcloud.eu' + _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)(?:/(?P<fname>[^#?]*)\.html)?' + + _TESTS = [{ + 'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html', + 'md5': '6bea4c7fa5daaacc2a946b7146286686', + 'info_dict': { + 'id': 'skp9j99s4bpz', + 'ext': 'mp4', + 'title': 'youtube-dl test video \'/\\ ä ↭', + }, + 'skip': 'Only available from the EU' + }, { + 'url': 'http://streamcloud.eu/ua8cmfh1nbe6/NSHIP-148--KUC-NG--H264-.mp4.html', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + url = 'http://streamcloud.eu/%s' % video_id + + orig_webpage = self._download_webpage(url, video_id) + + if '>File Not Found<' in orig_webpage: + raise ExtractorError( + 'Video %s does not exist' % video_id, expected=True) + + fields = re.findall(r'''(?x)<input\s+ + type="(?:hidden|submit)"\s+ + name="([^"]+)"\s+ + (?:id="[^"]+"\s+)? + value="([^"]*)" + ''', orig_webpage) + + self._sleep(6, video_id) + + webpage = self._download_webpage( + url, video_id, data=urlencode_postdata(fields), headers={ + b'Content-Type': b'application/x-www-form-urlencoded', + }) + + try: + title = self._html_search_regex( + r'<h1[^>]*>([^<]+)<', webpage, 'title') + video_url = self._search_regex( + r'file:\s*"([^"]+)"', webpage, 'video URL') + except ExtractorError: + message = self._html_search_regex( + r'(?s)<div[^>]+class=(["\']).*?msgboxinfo.*?\1[^>]*>(?P<message>.+?)</div>', + webpage, 'message', default=None, group='message') + if message: + raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) + raise + thumbnail = self._search_regex( + r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False) + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'thumbnail': thumbnail, + 'http_headers': { + 'Referer': url, + }, + } diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dl/extractor/streamcz.py new file mode 100644 index 000000000..58e0b4c80 --- /dev/null +++ b/youtube_dl/extractor/streamcz.py @@ -0,0 +1,105 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import hashlib +import time + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + sanitized_Request, +) + + +def _get_api_key(api_path): + if api_path.endswith('?'): + api_path = api_path[:-1] + + api_key = 'fb5f58a820353bd7095de526253c14fd' + a = '{0:}{1:}{2:}'.format(api_key, api_path, int(round(time.time() / 24 / 3600))) + return hashlib.md5(a.encode('ascii')).hexdigest() + + +class StreamCZIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<id>[0-9]+)' + _API_URL = 'http://www.stream.cz/API' + + _TESTS = [{ + 'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti', + 'md5': '934bb6a6d220d99c010783c9719960d5', + 'info_dict': { + 'id': '765767', + 'ext': 'mp4', + 'title': 'Peklo na talíři: Éčka pro děti', + 'description': 'Taška s grónskou pomazánkou a další pekelnosti ZDE', + 'thumbnail': 're:^http://im.stream.cz/episode/52961d7e19d423f8f06f0100', + 'duration': 256, + }, + }, { + 'url': 'http://www.stream.cz/blanik/10002447-tri-roky-pro-mazanka', + 'md5': '849a88c1e1ca47d41403c2ba5e59e261', + 'info_dict': { + 'id': '10002447', + 'ext': 'mp4', + 'title': 'Kancelář Blaník: Tři roky pro Mazánka', + 'description': 'md5:3862a00ba7bf0b3e44806b544032c859', + 'thumbnail': 're:^http://im.stream.cz/episode/537f838c50c11f8d21320000', + 'duration': 368, + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + api_path = '/episode/%s' % video_id + + req = sanitized_Request(self._API_URL + api_path) + req.add_header('Api-Password', _get_api_key(api_path)) + data = self._download_json(req, video_id) + + formats = [] + for quality, video in enumerate(data['video_qualities']): + for f in video['formats']: + typ = f['type'].partition('/')[2] + qlabel = video.get('quality_label') + formats.append({ + 'format_note': '%s-%s' % (qlabel, typ) if qlabel else typ, + 'format_id': '%s-%s' % (typ, f['quality']), + 'url': f['source'], + 'height': int_or_none(f['quality'].rstrip('p')), + 'quality': quality, + }) + self._sort_formats(formats) + + image = data.get('image') + if image: + thumbnail = self._proto_relative_url( + image.replace('{width}', '1240').replace('{height}', '697'), + scheme='http:', + ) + else: + thumbnail = None + + stream = data.get('_embedded', {}).get('stream:show', {}).get('name') + if stream: + title = '%s: %s' % (stream, data['name']) + else: + title = data['name'] + + subtitles = {} + srt_url = data.get('subtitles_srt') + if srt_url: + subtitles['cs'] = [{ + 'ext': 'srt', + 'url': srt_url, + }] + + return { + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + 'formats': formats, + 'description': data.get('web_site_text'), + 'duration': int_or_none(data.get('duration')), + 'view_count': int_or_none(data.get('views')), + 'subtitles': subtitles, + } diff --git a/youtube_dl/extractor/streetvoice.py b/youtube_dl/extractor/streetvoice.py new file mode 100644 index 000000000..91612c7f2 --- /dev/null +++ b/youtube_dl/extractor/streetvoice.py @@ -0,0 +1,49 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import unified_strdate + + +class StreetVoiceIE(InfoExtractor): + _VALID_URL = r'https?://(?:.+?\.)?streetvoice\.com/[^/]+/songs/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'http://streetvoice.com/skippylu/songs/94440/', + 'md5': '15974627fc01a29e492c98593c2fd472', + 'info_dict': { + 'id': '94440', + 'ext': 'mp3', + 'title': '輸', + 'description': 'Crispy脆樂團 - 輸', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 260, + 'upload_date': '20091018', + 'uploader': 'Crispy脆樂團', + 'uploader_id': '627810', + } + }, { + 'url': 'http://tw.streetvoice.com/skippylu/songs/94440/', + 'only_matching': True, + }] + + def _real_extract(self, url): + song_id = self._match_id(url) + + song = self._download_json( + 'https://streetvoice.com/api/v1/public/song/%s/' % song_id, song_id, data=b'') + + title = song['name'] + author = song['user']['nickname'] + + return { + 'id': song_id, + 'url': song['file'], + 'title': title, + 'description': '%s - %s' % (author, title), + 'thumbnail': self._proto_relative_url(song.get('image'), 'http:'), + 'duration': song.get('length'), + 'upload_date': unified_strdate(song.get('created_at')), + 'uploader': author, + 'uploader_id': compat_str(song['user']['id']), + } diff --git a/youtube_dl/extractor/stretchinternet.py b/youtube_dl/extractor/stretchinternet.py new file mode 100644 index 000000000..ae2ac1b42 --- /dev/null +++ b/youtube_dl/extractor/stretchinternet.py @@ -0,0 +1,48 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import int_or_none + + +class StretchInternetIE(InfoExtractor): + _VALID_URL = r'https?://portal\.stretchinternet\.com/[^/]+/portal\.htm\?.*?\beventId=(?P<id>\d+)' + _TEST = { + 'url': 'https://portal.stretchinternet.com/umary/portal.htm?eventId=313900&streamType=video', + 'info_dict': { + 'id': '313900', + 'ext': 'mp4', + 'title': 'Augustana (S.D.) Baseball vs University of Mary', + 'description': 'md5:7578478614aae3bdd4a90f578f787438', + 'timestamp': 1490468400, + 'upload_date': '20170325', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + stream = self._download_json( + 'https://neo-client.stretchinternet.com/streamservice/v1/media/stream/v%s' + % video_id, video_id) + + video_url = 'https://%s' % stream['source'] + + event = self._download_json( + 'https://neo-client.stretchinternet.com/portal-ws/getEvent.json', + video_id, query={ + 'clientID': 99997, + 'eventID': video_id, + 'token': 'asdf', + })['event'] + + title = event.get('title') or event['mobileTitle'] + description = event.get('customText') + timestamp = int_or_none(event.get('longtime')) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'timestamp': timestamp, + 'url': video_url, + } diff --git a/youtube_dl/extractor/stv.py b/youtube_dl/extractor/stv.py new file mode 100644 index 000000000..ccb074cd4 --- /dev/null +++ b/youtube_dl/extractor/stv.py @@ -0,0 +1,94 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urllib_parse_urlparse +) +from ..utils import ( + extract_attributes, + float_or_none, + int_or_none, + str_or_none, +) + + +class STVPlayerIE(InfoExtractor): + IE_NAME = 'stv:player' + _VALID_URL = r'https?://player\.stv\.tv/(?P<type>episode|video)/(?P<id>[a-z0-9]{4})' + _TEST = { + 'url': 'https://player.stv.tv/video/7srz/victoria/interview-with-the-cast-ahead-of-new-victoria/', + 'md5': '2ad867d4afd641fa14187596e0fbc91b', + 'info_dict': { + 'id': '6016487034001', + 'ext': 'mp4', + 'upload_date': '20190321', + 'title': 'Interview with the cast ahead of new Victoria', + 'description': 'Nell Hudson and Lily Travers tell us what to expect in the new season of Victoria.', + 'timestamp': 1553179628, + 'uploader_id': '1486976045', + }, + 'skip': 'this resource is unavailable outside of the UK', + } + _PUBLISHER_ID = '1486976045' + _PTYPE_MAP = { + 'episode': 'episodes', + 'video': 'shortform', + } + + def _real_extract(self, url): + ptype, video_id = re.match(self._VALID_URL, url).groups() + webpage = self._download_webpage(url, video_id) + + qs = compat_parse_qs(compat_urllib_parse_urlparse(self._search_regex( + r'itemprop="embedURL"[^>]+href="([^"]+)', + webpage, 'embed URL', default=None)).query) + publisher_id = qs.get('publisherID', [None])[0] or self._PUBLISHER_ID + + player_attr = extract_attributes(self._search_regex( + r'(<[^>]+class="bcplayer"[^>]+>)', webpage, 'player', default=None)) or {} + + info = {} + duration = ref_id = series = video_id = None + api_ref_id = player_attr.get('data-player-api-refid') + if api_ref_id: + resp = self._download_json( + 'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], api_ref_id), + api_ref_id, fatal=False) + if resp: + result = resp.get('results') or {} + video = result.get('video') or {} + video_id = str_or_none(video.get('id')) + ref_id = video.get('guid') + duration = video.get('length') + programme = result.get('programme') or {} + series = programme.get('name') or programme.get('shortName') + subtitles = {} + _subtitles = result.get('_subtitles') or {} + for ext, sub_url in _subtitles.items(): + subtitles.setdefault('en', []).append({ + 'ext': 'vtt' if ext == 'webvtt' else ext, + 'url': sub_url, + }) + info.update({ + 'description': result.get('summary'), + 'subtitles': subtitles, + 'view_count': int_or_none(result.get('views')), + }) + if not video_id: + video_id = qs.get('videoId', [None])[0] or self._search_regex( + r'<link\s+itemprop="url"\s+href="(\d+)"', + webpage, 'video id', default=None) or 'ref:' + (ref_id or player_attr['data-refid']) + + info.update({ + '_type': 'url_transparent', + 'duration': float_or_none(duration or player_attr.get('data-duration'), 1000), + 'id': video_id, + 'ie_key': 'BrightcoveNew', + 'series': series or player_attr.get('data-programme-name'), + 'url': 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id), + }) + return info diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py new file mode 100644 index 000000000..68051169b --- /dev/null +++ b/youtube_dl/extractor/sunporno.py @@ -0,0 +1,79 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + int_or_none, + qualities, + determine_ext, +) + + +class SunPornoIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www\.)?sunporno\.com/videos|embeds\.sunporno\.com/embed)/(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://www.sunporno.com/videos/807778/', + 'md5': '507887e29033502f29dba69affeebfc9', + 'info_dict': { + 'id': '807778', + 'ext': 'mp4', + 'title': 'md5:0a400058e8105d39e35c35e7c5184164', + 'description': 'md5:a31241990e1bd3a64e72ae99afb325fb', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 302, + 'age_limit': 18, + } + }, { + 'url': 'http://embeds.sunporno.com/embed/807778', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://www.sunporno.com/videos/%s' % video_id, video_id) + + title = self._html_search_regex( + r'<title>([^<]+)', webpage, 'title') + description = self._html_search_meta( + 'description', webpage, 'description') + thumbnail = self._html_search_regex( + r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False) + + duration = parse_duration(self._search_regex( + (r'itemprop="duration"[^>]*>\s*(\d+:\d+)\s*<', + r'>Duration:\s*]+>\s*(\d+:\d+)\s*<'), + webpage, 'duration', fatal=False)) + + view_count = int_or_none(self._html_search_regex( + r'class="views">(?: