[aljazeera] improve _VALID_URL with test

This commit is contained in:
Luca Steeb 2017-05-11 15:07:10 +02:00
commit 8a70267df5
39 changed files with 1400 additions and 683 deletions

View File

@ -6,8 +6,8 @@
---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.05.01*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.05.01**
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2017.05.09*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2017.05.09**
### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2017.05.01
[debug] youtube-dl version 2017.05.09
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}

View File

@ -1,7 +1,55 @@
version <unreleased>
version 2017.05.09
Core
* [YoutubeDL] Force --restrict-filenames when no locale is set on all python
versions (#13027)
Extractors
* [francetv] Adapt to site redesign (#13034)
+ [packtpub] Add support for authentication (#12622)
* [drtv] Lower preference for SignLanguage formats (#13013, #13016)
+ [cspan] Add support for brightcove live embeds (#13028)
* [vrv] Extract DASH formats and subtitles
* [funimation] Fix authentication (#13021)
* [adultswim] Fix extraction (#8640, #10950, #11042, #12121)
+ Add support for Adobe Pass authentication
+ Add support for live streams
+ Add support for show pages
* [turner] Extract thumbnail, is_live and strip description
+ [nonktube] Add support for nonktube.com (#8647, #13024)
+ [nuevo] Pass headers to _extract_nuevo
* [nbc] Improve extraction (#12364)
version 2017.05.07
Common
* [extractor/common] Fix typo in _extract_akamai_formats
+ [postprocessor/ffmpeg] Embed chapters into media file with --add-metadata
+ [extractor/common] Introduce chapters meta field
Extractors
* [youtube] Fix authentication (#12820, #12927, #12973, #12992, #12993, #12995,
#13003)
* [bilibili] Fix video downloading (#13001)
* [rmcdecouverte] Fix extraction (#12937)
* [theplatform] Extract chapters
* [bandcamp] Fix thumbnail extraction (#12980)
* [pornhub] Extend URL regular expression (#12996)
+ [youtube] Extract chapters
+ [nrk] Extract chapters
+ [vice] Add support for ooyala embeds in article pages
+ [vice] Support vice articles (#12968)
* [vice] Fix extraction for non en_us videos (#12967)
* [gdcvault] Fix extraction for some videos (#12733)
* [pbs] Improve multipart video support (#12981)
* [laola1tv] Fix extraction (#12880)
+ [cda] Support birthday verification (#12789)
* [leeco] Fix extraction (#12974)
+ [pbs] Extract chapters
* [amp] Imporove thumbnail and subtitles extraction
* [foxsports] Fix extraction (#12945)
- [coub] Remove comment count extraction (#12941)
version 2017.05.01

View File

@ -281,7 +281,8 @@
- **france2.fr:generation-quoi**
- **FranceCulture**
- **FranceInter**
- **francetv**: France 2, 3, 4, 5 and Ô
- **FranceTV**
- **FranceTVEmbed**
- **francetvinfo.fr**
- **Freesound**
- **freespeech.org**
@ -530,6 +531,7 @@
- **NJPWWorld**: 新日本プロレスワールド
- **NobelPrize**
- **Noco**
- **NonkTube**
- **Noovo**
- **Normalboots**
- **NosVideo**
@ -602,7 +604,6 @@
- **pluralsight**
- **pluralsight:course**
- **plus.google**: Google Plus
- **pluzz.francetv.fr**
- **podomatic**
- **Pokemon**
- **PolskieRadio**
@ -879,9 +880,10 @@
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
- **vh1.com**
- **Viafree**
- **Vice**
- **vice**
- **vice:article**
- **vice:show**
- **Viceland**
- **ViceShow**
- **Vidbit**
- **Viddler**
- **Videa**

View File

@ -44,6 +44,7 @@ from youtube_dl.utils import (
limit_length,
mimetype2ext,
month_by_name,
multipart_encode,
ohdave_rsa_encrypt,
OnDemandPagedList,
orderedSet,
@ -620,6 +621,16 @@ class TestUtil(unittest.TestCase):
'http://example.com/path', {'test': '第二行тест'})),
query_dict('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82'))
def test_multipart_encode(self):
self.assertEqual(
multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0],
b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n')
self.assertEqual(
multipart_encode({'欄位'.encode('utf-8'): ''.encode('utf-8')}, boundary='AAAAAA')[0],
b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n')
self.assertRaises(
ValueError, multipart_encode, {b'field': b'value'}, boundary='value')
def test_dict_get(self):
FALSE_VALUES = {
'none': None,

View File

@ -0,0 +1,268 @@
#!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals
# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import expect_value
from youtube_dl.extractor import YoutubeIE
class TestYoutubeChapters(unittest.TestCase):
_TEST_CASES = [
(
# https://www.youtube.com/watch?v=A22oy8dFjqc
# pattern: 00:00 - <title>
'''This is the absolute ULTIMATE experience of Queen's set at LIVE AID, this is the best video mixed to the absolutely superior stereo radio broadcast. This vastly superior audio mix takes a huge dump on all of the official mixes. Best viewed in 1080p. ENJOY! ***MAKE SURE TO READ THE DESCRIPTION***<br /><a href="#" onclick="yt.www.watch.player.seekTo(00*60+36);return false;">00:36</a> - Bohemian Rhapsody<br /><a href="#" onclick="yt.www.watch.player.seekTo(02*60+42);return false;">02:42</a> - Radio Ga Ga<br /><a href="#" onclick="yt.www.watch.player.seekTo(06*60+53);return false;">06:53</a> - Ay Oh!<br /><a href="#" onclick="yt.www.watch.player.seekTo(07*60+34);return false;">07:34</a> - Hammer To Fall<br /><a href="#" onclick="yt.www.watch.player.seekTo(12*60+08);return false;">12:08</a> - Crazy Little Thing Called Love<br /><a href="#" onclick="yt.www.watch.player.seekTo(16*60+03);return false;">16:03</a> - We Will Rock You<br /><a href="#" onclick="yt.www.watch.player.seekTo(17*60+18);return false;">17:18</a> - We Are The Champions<br /><a href="#" onclick="yt.www.watch.player.seekTo(21*60+12);return false;">21:12</a> - Is This The World We Created...?<br /><br />Short song analysis:<br /><br />- "Bohemian Rhapsody": Although it's a short medley version, it's one of the best performances of the ballad section, with Freddie nailing the Bb4s with the correct studio phrasing (for the first time ever!).<br /><br />- "Radio Ga Ga": Although it's missing one chorus, this is one of - if not the best - the best versions ever, Freddie nails all the Bb4s and sounds very clean! Spike Edney's Roland Jupiter 8 also really shines through on this mix, compared to the DVD releases!<br /><br />- "Audience Improv": A great improv, Freddie sounds strong and confident. You gotta love when he sustains that A4 for 4 seconds!<br /><br />- "Hammer To Fall": Despite missing a verse and a chorus, it's a strong version (possibly the best ever). Freddie sings the song amazingly, and even ad-libs a C#5 and a C5! Also notice how heavy Brian's guitar sounds compared to the thin DVD mixes - it roars!<br /><br />- "Crazy Little Thing Called Love": A great version, the crowd loves the song, the jam is great as well! Only downside to this is the slight feedback issues.<br /><br />- "We Will Rock You": Although cut down to the 1st verse and chorus, Freddie sounds strong. He nails the A4, and the solo from Dr. May is brilliant!<br /><br />- "We Are the Champions": Perhaps the high-light of the performance - Freddie is very daring on this version, he sustains the pre-chorus Bb4s, nails the 1st C5, belts great A4s, but most importantly: He nails the chorus Bb4s, in all 3 choruses! This is the only time he has ever done so! It has to be said though, the last one sounds a bit rough, but that's a side effect of belting high notes for the past 18 minutes, with nodules AND laryngitis!<br /><br />- "Is This The World We Created... ?": Freddie and Brian perform a beautiful version of this, and it is one of the best versions ever. It's both sad and hilarious that a couple of BBC engineers are talking over the song, one of them being completely oblivious of the fact that he is interrupting the performance, on live television... Which was being televised to almost 2 billion homes.<br /><br /><br />All rights go to their respective owners!<br />-----Copyright Disclaimer Under Section 107 of the Copyright Act 1976, allowance is made for fair use for purposes such as criticism, comment, news reporting, teaching, scholarship, and research. Fair use is a use permitted by copyright statute that might otherwise be infringing. Non-profit, educational or personal use tips the balance in favor of fair use''',
1477,
[{
'start_time': 36,
'end_time': 162,
'title': 'Bohemian Rhapsody',
}, {
'start_time': 162,
'end_time': 413,
'title': 'Radio Ga Ga',
}, {
'start_time': 413,
'end_time': 454,
'title': 'Ay Oh!',
}, {
'start_time': 454,
'end_time': 728,
'title': 'Hammer To Fall',
}, {
'start_time': 728,
'end_time': 963,
'title': 'Crazy Little Thing Called Love',
}, {
'start_time': 963,
'end_time': 1038,
'title': 'We Will Rock You',
}, {
'start_time': 1038,
'end_time': 1272,
'title': 'We Are The Champions',
}, {
'start_time': 1272,
'end_time': 1477,
'title': 'Is This The World We Created...?',
}]
),
(
# https://www.youtube.com/watch?v=ekYlRhALiRQ
# pattern: <num>. <title> 0:00
'1. Those Beaten Paths of Confusion <a href="#" onclick="yt.www.watch.player.seekTo(0*60+00);return false;">0:00</a><br />2. Beyond the Shadows of Emptiness & Nothingness <a href="#" onclick="yt.www.watch.player.seekTo(11*60+47);return false;">11:47</a><br />3. Poison Yourself...With Thought <a href="#" onclick="yt.www.watch.player.seekTo(26*60+30);return false;">26:30</a><br />4. The Agents of Transformation <a href="#" onclick="yt.www.watch.player.seekTo(35*60+57);return false;">35:57</a><br />5. Drowning in the Pain of Consciousness <a href="#" onclick="yt.www.watch.player.seekTo(44*60+32);return false;">44:32</a><br />6. Deny the Disease of Life <a href="#" onclick="yt.www.watch.player.seekTo(53*60+07);return false;">53:07</a><br /><br />More info/Buy: http://crepusculonegro.storenvy.com/products/257645-cn-03-arizmenda-within-the-vacuum-of-infinity<br /><br />No copyright is intended. The rights to this video are assumed by the owner and its affiliates.',
4009,
[{
'start_time': 0,
'end_time': 707,
'title': '1. Those Beaten Paths of Confusion',
}, {
'start_time': 707,
'end_time': 1590,
'title': '2. Beyond the Shadows of Emptiness & Nothingness',
}, {
'start_time': 1590,
'end_time': 2157,
'title': '3. Poison Yourself...With Thought',
}, {
'start_time': 2157,
'end_time': 2672,
'title': '4. The Agents of Transformation',
}, {
'start_time': 2672,
'end_time': 3187,
'title': '5. Drowning in the Pain of Consciousness',
}, {
'start_time': 3187,
'end_time': 4009,
'title': '6. Deny the Disease of Life',
}]
),
(
# https://www.youtube.com/watch?v=WjL4pSzog9w
# pattern: 00:00 <title>
'<a href="https://arizmenda.bandcamp.com/merch/despairs-depths-descended-cd" class="yt-uix-servicelink " data-target-new-window="True" data-servicelink="CDAQ6TgYACITCNf1raqT2dMCFdRjGAod_o0CBSj4HQ" data-url="https://arizmenda.bandcamp.com/merch/despairs-depths-descended-cd" rel="nofollow noopener" target="_blank">https://arizmenda.bandcamp.com/merch/...</a><br /><br /><a href="#" onclick="yt.www.watch.player.seekTo(00*60+00);return false;">00:00</a> Christening Unborn Deformities <br /><a href="#" onclick="yt.www.watch.player.seekTo(07*60+08);return false;">07:08</a> Taste of Purity<br /><a href="#" onclick="yt.www.watch.player.seekTo(16*60+16);return false;">16:16</a> Sculpting Sins of a Universal Tongue<br /><a href="#" onclick="yt.www.watch.player.seekTo(24*60+45);return false;">24:45</a> Birth<br /><a href="#" onclick="yt.www.watch.player.seekTo(31*60+24);return false;">31:24</a> Neves<br /><a href="#" onclick="yt.www.watch.player.seekTo(37*60+55);return false;">37:55</a> Libations in Limbo',
2705,
[{
'start_time': 0,
'end_time': 428,
'title': 'Christening Unborn Deformities',
}, {
'start_time': 428,
'end_time': 976,
'title': 'Taste of Purity',
}, {
'start_time': 976,
'end_time': 1485,
'title': 'Sculpting Sins of a Universal Tongue',
}, {
'start_time': 1485,
'end_time': 1884,
'title': 'Birth',
}, {
'start_time': 1884,
'end_time': 2275,
'title': 'Neves',
}, {
'start_time': 2275,
'end_time': 2705,
'title': 'Libations in Limbo',
}]
),
(
# https://www.youtube.com/watch?v=o3r1sn-t3is
# pattern: <title> 00:00 <note>
'Download this show in MP3: <a href="http://sh.st/njZKK" class="yt-uix-servicelink " data-url="http://sh.st/njZKK" data-target-new-window="True" data-servicelink="CDAQ6TgYACITCK3j8_6o2dMCFVDCGAoduVAKKij4HQ" rel="nofollow noopener" target="_blank">http://sh.st/njZKK</a><br /><br />Setlist:<br />I-E-A-I-A-I-O <a href="#" onclick="yt.www.watch.player.seekTo(00*60+45);return false;">00:45</a><br />Suite-Pee <a href="#" onclick="yt.www.watch.player.seekTo(4*60+26);return false;">4:26</a> (Incomplete)<br />Attack <a href="#" onclick="yt.www.watch.player.seekTo(5*60+31);return false;">5:31</a> (First live performance since 2011)<br />Prison Song <a href="#" onclick="yt.www.watch.player.seekTo(8*60+42);return false;">8:42</a><br />Know <a href="#" onclick="yt.www.watch.player.seekTo(12*60+32);return false;">12:32</a> (First live performance since 2011)<br />Aerials <a href="#" onclick="yt.www.watch.player.seekTo(15*60+32);return false;">15:32</a><br />Soldier Side - Intro <a href="#" onclick="yt.www.watch.player.seekTo(19*60+13);return false;">19:13</a><br />B.Y.O.B. <a href="#" onclick="yt.www.watch.player.seekTo(20*60+09);return false;">20:09</a><br />Soil <a href="#" onclick="yt.www.watch.player.seekTo(24*60+32);return false;">24:32</a><br />Darts <a href="#" onclick="yt.www.watch.player.seekTo(27*60+48);return false;">27:48</a><br />Radio/Video <a href="#" onclick="yt.www.watch.player.seekTo(30*60+38);return false;">30:38</a><br />Hypnotize <a href="#" onclick="yt.www.watch.player.seekTo(35*60+05);return false;">35:05</a><br />Temper <a href="#" onclick="yt.www.watch.player.seekTo(38*60+08);return false;">38:08</a> (First live performance since 1999)<br />CUBErt <a href="#" onclick="yt.www.watch.player.seekTo(41*60+00);return false;">41:00</a><br />Needles <a href="#" onclick="yt.www.watch.player.seekTo(42*60+57);return false;">42:57</a><br />Deer Dance <a href="#" onclick="yt.www.watch.player.seekTo(46*60+27);return false;">46:27</a><br />Bounce <a href="#" onclick="yt.www.watch.player.seekTo(49*60+38);return false;">49:38</a><br />Suggestions <a href="#" onclick="yt.www.watch.player.seekTo(51*60+25);return false;">51:25</a><br />Psycho <a href="#" onclick="yt.www.watch.player.seekTo(53*60+52);return false;">53:52</a><br />Chop Suey! <a href="#" onclick="yt.www.watch.player.seekTo(58*60+13);return false;">58:13</a><br />Lonely Day <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+01*60+15);return false;">1:01:15</a><br />Question! <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+04*60+14);return false;">1:04:14</a><br />Lost in Hollywood <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+08*60+10);return false;">1:08:10</a><br />Vicinity of Obscenity <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+13*60+40);return false;">1:13:40</a>(First live performance since 2012)<br />Forest <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+16*60+17);return false;">1:16:17</a><br />Cigaro <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+20*60+02);return false;">1:20:02</a><br />Toxicity <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+23*60+57);return false;">1:23:57</a>(with Chino Moreno)<br />Sugar <a href="#" onclick="yt.www.watch.player.seekTo(1*3600+27*60+53);return false;">1:27:53</a>',
5640,
[{
'start_time': 45,
'end_time': 266,
'title': 'I-E-A-I-A-I-O',
}, {
'start_time': 266,
'end_time': 331,
'title': 'Suite-Pee (Incomplete)',
}, {
'start_time': 331,
'end_time': 522,
'title': 'Attack (First live performance since 2011)',
}, {
'start_time': 522,
'end_time': 752,
'title': 'Prison Song',
}, {
'start_time': 752,
'end_time': 932,
'title': 'Know (First live performance since 2011)',
}, {
'start_time': 932,
'end_time': 1153,
'title': 'Aerials',
}, {
'start_time': 1153,
'end_time': 1209,
'title': 'Soldier Side - Intro',
}, {
'start_time': 1209,
'end_time': 1472,
'title': 'B.Y.O.B.',
}, {
'start_time': 1472,
'end_time': 1668,
'title': 'Soil',
}, {
'start_time': 1668,
'end_time': 1838,
'title': 'Darts',
}, {
'start_time': 1838,
'end_time': 2105,
'title': 'Radio/Video',
}, {
'start_time': 2105,
'end_time': 2288,
'title': 'Hypnotize',
}, {
'start_time': 2288,
'end_time': 2460,
'title': 'Temper (First live performance since 1999)',
}, {
'start_time': 2460,
'end_time': 2577,
'title': 'CUBErt',
}, {
'start_time': 2577,
'end_time': 2787,
'title': 'Needles',
}, {
'start_time': 2787,
'end_time': 2978,
'title': 'Deer Dance',
}, {
'start_time': 2978,
'end_time': 3085,
'title': 'Bounce',
}, {
'start_time': 3085,
'end_time': 3232,
'title': 'Suggestions',
}, {
'start_time': 3232,
'end_time': 3493,
'title': 'Psycho',
}, {
'start_time': 3493,
'end_time': 3675,
'title': 'Chop Suey!',
}, {
'start_time': 3675,
'end_time': 3854,
'title': 'Lonely Day',
}, {
'start_time': 3854,
'end_time': 4090,
'title': 'Question!',
}, {
'start_time': 4090,
'end_time': 4420,
'title': 'Lost in Hollywood',
}, {
'start_time': 4420,
'end_time': 4577,
'title': 'Vicinity of Obscenity (First live performance since 2012)',
}, {
'start_time': 4577,
'end_time': 4802,
'title': 'Forest',
}, {
'start_time': 4802,
'end_time': 5037,
'title': 'Cigaro',
}, {
'start_time': 5037,
'end_time': 5273,
'title': 'Toxicity (with Chino Moreno)',
}, {
'start_time': 5273,
'end_time': 5640,
'title': 'Sugar',
}]
),
(
# https://www.youtube.com/watch?v=PkYLQbsqCE8
# pattern: <num> - <title> [<latinized title>] 0:00:00
'''Затемно (Zatemno) is an Obscure Black Metal Band from Russia.<br /><br />"Во прах (Vo prakh)'' Into The Ashes", Debut mini-album released may 6, 2016, by Death Knell Productions<br />Released on 6 panel digipak CD, limited to 100 copies only<br />And digital format on Bandcamp<br /><br />Tracklist<br /><br />1 - Во прах [Vo prakh] <a href="#" onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;">0:00:00</a><br />2 - Искупление [Iskupleniye] <a href="#" onclick="yt.www.watch.player.seekTo(0*3600+08*60+10);return false;">0:08:10</a><br />3 - Из серпов луны...[Iz serpov luny] <a href="#" onclick="yt.www.watch.player.seekTo(0*3600+14*60+30);return false;">0:14:30</a><br /><br />Links:<br /><a href="https://deathknellprod.bandcamp.com/album/--2" class="yt-uix-servicelink " data-target-new-window="True" data-url="https://deathknellprod.bandcamp.com/album/--2" data-servicelink="CC8Q6TgYACITCNP234Kr2dMCFcNxGAodQqsIwSj4HQ" target="_blank" rel="nofollow noopener">https://deathknellprod.bandcamp.com/a...</a><br /><a href="https://www.facebook.com/DeathKnellProd/" class="yt-uix-servicelink " data-target-new-window="True" data-url="https://www.facebook.com/DeathKnellProd/" data-servicelink="CC8Q6TgYACITCNP234Kr2dMCFcNxGAodQqsIwSj4HQ" target="_blank" rel="nofollow noopener">https://www.facebook.com/DeathKnellProd/</a><br /><br /><br />I don't have any right about this artifact, my only intention is to spread the music of the band, all rights are reserved to the Затемно (Zatemno) and his producers, Death Knell Productions.<br /><br />------------------------------------------------------------------<br /><br />Subscribe for more videos like this.<br />My link: <a href="https://web.facebook.com/AttackOfTheDragons" class="yt-uix-servicelink " data-target-new-window="True" data-url="https://web.facebook.com/AttackOfTheDragons" data-servicelink="CC8Q6TgYACITCNP234Kr2dMCFcNxGAodQqsIwSj4HQ" target="_blank" rel="nofollow noopener">https://web.facebook.com/AttackOfTheD...</a>''',
1138,
[{
'start_time': 0,
'end_time': 490,
'title': '1 - Во прах [Vo prakh]',
}, {
'start_time': 490,
'end_time': 870,
'title': '2 - Искупление [Iskupleniye]',
}, {
'start_time': 870,
'end_time': 1138,
'title': '3 - Из серпов луны...[Iz serpov luny]',
}]
),
]
def test_youtube_chapters(self):
for description, duration, expected_chapters in self._TEST_CASES:
ie = YoutubeIE()
expect_value(
self, ie._extract_chapters(description, duration),
expected_chapters, None)
if __name__ == '__main__':
unittest.main()

View File

@ -370,10 +370,10 @@ class YoutubeDL(object):
else:
raise
if (sys.version_info >= (3,) and sys.platform != 'win32' and
if (sys.platform != 'win32' and
sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
not params.get('restrictfilenames', False)):
# On Python 3, the Unicode filesystem API will throw errors (#1474)
# Unicode filesystem API will throw errors (#1474, #13027)
self.report_warning(
'Assuming --restrict-filenames since file system encoding '
'cannot encode all characters. '

View File

@ -5,91 +5,52 @@ import re
from .turner import TurnerBaseIE
from ..utils import (
ExtractorError,
int_or_none,
strip_or_none,
)
class AdultSwimIE(TurnerBaseIE):
_VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?'
_VALID_URL = r'https?://(?:www\.)?adultswim\.com/videos/(?P<show_path>[^/?#]+)(?:/(?P<episode_path>[^/?#]+))?'
_TESTS = [{
'url': 'http://adultswim.com/videos/rick-and-morty/pilot',
'playlist': [
{
'md5': '247572debc75c7652f253c8daa51a14d',
'info_dict': {
'id': 'rQxZvXQ4ROaSOqq-or2Mow-0',
'ext': 'flv',
'title': 'Rick and Morty - Pilot Part 1',
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
},
},
{
'md5': '77b0e037a4b20ec6b98671c4c379f48d',
'info_dict': {
'id': 'rQxZvXQ4ROaSOqq-or2Mow-3',
'ext': 'flv',
'title': 'Rick and Morty - Pilot Part 4',
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
},
},
],
'info_dict': {
'id': 'rQxZvXQ4ROaSOqq-or2Mow',
'ext': 'mp4',
'title': 'Rick and Morty - Pilot',
'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
},
'skip': 'This video is only available for registered users',
}, {
'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/',
'playlist': [
{
'md5': '2eb5c06d0f9a1539da3718d897f13ec5',
'info_dict': {
'id': '-t8CamQlQ2aYZ49ItZCFog-0',
'ext': 'flv',
'title': 'American Dad - Putting Francine Out of Business',
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
},
}
],
'info_dict': {
'id': '-t8CamQlQ2aYZ49ItZCFog',
'title': 'American Dad - Putting Francine Out of Business',
'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
},
}, {
'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
'playlist': [
{
'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
'info_dict': {
'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
'ext': 'mp4',
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
},
}
],
'info_dict': {
'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
'description': 'Rick moves in with his daughter\'s family and establishes himself as a bad influence on his grandson, Morty.',
'timestamp': 1493267400,
'upload_date': '20170427',
},
'params': {
# m3u8 download
'skip_download': True,
}
},
'expected_warnings': ['Unable to download f4m manifest'],
}, {
'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
'info_dict': {
'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
'ext': 'mp4',
'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
'description': 'Dr. Brule reports live from Wine Country with a special report on wines. \nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.',
'upload_date': '20080124',
'timestamp': 1201150800,
},
'params': {
# m3u8 download
'skip_download': True,
},
}, {
# heroMetadata.trailer
'url': 'http://www.adultswim.com/videos/decker/inside-decker-a-new-hero/',
'info_dict': {
'id': 'I0LQFQkaSUaFp8PnAWHhoQ',
'ext': 'mp4',
'title': 'Decker - Inside Decker: A New Hero',
'description': 'md5:c916df071d425d62d70c86d4399d3ee0',
'duration': 249.008,
'description': 'The guys recap the conclusion of the season. They announce a new hero, take a peek into the Victorville Film Archive and welcome back the talented James Dean.',
'timestamp': 1469480460,
'upload_date': '20160725',
},
'params': {
# m3u8 download
@ -97,136 +58,102 @@ class AdultSwimIE(TurnerBaseIE):
},
'expected_warnings': ['Unable to download f4m manifest'],
}, {
'url': 'http://www.adultswim.com/videos/toonami/friday-october-14th-2016/',
'url': 'http://www.adultswim.com/videos/attack-on-titan',
'info_dict': {
'id': 'eYiLsKVgQ6qTC6agD67Sig',
'title': 'Toonami - Friday, October 14th, 2016',
'description': 'md5:99892c96ffc85e159a428de85c30acde',
'id': 'b7A69dzfRzuaXIECdxW8XQ',
'title': 'Attack on Titan',
'description': 'md5:6c8e003ea0777b47013e894767f5e114',
},
'playlist_mincount': 12,
}, {
'url': 'http://www.adultswim.com/videos/streams/williams-stream',
'info_dict': {
'id': 'd8DEBj7QRfetLsRgFnGEyg',
'ext': 'mp4',
'title': r're:^Williams Stream \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'description': 'original programming',
},
'playlist': [{
'md5': '',
'info_dict': {
'id': 'eYiLsKVgQ6qTC6agD67Sig',
'ext': 'mp4',
'title': 'Toonami - Friday, October 14th, 2016',
'description': 'md5:99892c96ffc85e159a428de85c30acde',
},
}],
'params': {
# m3u8 download
'skip_download': True,
},
'expected_warnings': ['Unable to download f4m manifest'],
}]
@staticmethod
def find_video_info(collection, slug):
for video in collection.get('videos'):
if video.get('slug') == slug:
return video
@staticmethod
def find_collection_by_linkURL(collections, linkURL):
for collection in collections:
if collection.get('linkURL') == linkURL:
return collection
@staticmethod
def find_collection_containing_video(collections, slug):
for collection in collections:
for video in collection.get('videos'):
if video.get('slug') == slug:
return collection, video
return None, None
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
show_path = mobj.group('show_path')
episode_path = mobj.group('episode_path')
is_playlist = True if mobj.group('is_playlist') else False
show_path, episode_path = re.match(self._VALID_URL, url).groups()
display_id = episode_path or show_path
webpage = self._download_webpage(url, display_id)
initial_data = self._parse_json(self._search_regex(
r'AS_INITIAL_DATA(?:__)?\s*=\s*({.+?});',
webpage, 'initial data'), display_id)
webpage = self._download_webpage(url, episode_path)
is_stream = show_path == 'streams'
if is_stream:
if not episode_path:
episode_path = 'live-stream'
# Extract the value of `bootstrappedData` from the Javascript in the page.
bootstrapped_data = self._parse_json(self._search_regex(
r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
video_data = next(stream for stream_path, stream in initial_data['streams'].items() if stream_path == episode_path)
video_id = video_data.get('stream')
# Downloading videos from a /videos/playlist/ URL needs to be handled differently.
# NOTE: We are only downloading one video (the current one) not the playlist
if is_playlist:
collections = bootstrapped_data['playlists']['collections']
collection = self.find_collection_by_linkURL(collections, show_path)
video_info = self.find_video_info(collection, episode_path)
show_title = video_info['showTitle']
segment_ids = [video_info['videoPlaybackID']]
if not video_id:
entries = []
for episode in video_data.get('archiveEpisodes', []):
episode_url = episode.get('url')
if not episode_url:
continue
entries.append(self.url_result(
episode_url, 'AdultSwim', episode.get('id')))
return self.playlist_result(
entries, video_data.get('id'), video_data.get('title'),
strip_or_none(video_data.get('description')))
else:
collections = bootstrapped_data['show']['collections']
collection, video_info = self.find_collection_containing_video(collections, episode_path)
# Video wasn't found in the collections, let's try `slugged_video`.
if video_info is None:
if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
video_info = bootstrapped_data['slugged_video']
if not video_info:
video_info = bootstrapped_data.get(
'heroMetadata', {}).get('trailer', {}).get('video')
if not video_info:
video_info = bootstrapped_data.get('onlineOriginals', [None])[0]
if not video_info:
raise ExtractorError('Unable to find video info')
show_data = initial_data['show']
show = bootstrapped_data['show']
show_title = show['title']
stream = video_info.get('stream')
if stream and stream.get('videoPlaybackID'):
segment_ids = [stream['videoPlaybackID']]
elif video_info.get('clips'):
segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
elif video_info.get('videoPlaybackID'):
segment_ids = [video_info['videoPlaybackID']]
elif video_info.get('id'):
segment_ids = [video_info['id']]
else:
if video_info.get('auth') is True:
raise ExtractorError(
'This video is only available via cable service provider subscription that'
' is not currently supported. You may want to use --cookies.', expected=True)
else:
raise ExtractorError('Unable to find stream or clips')
if not episode_path:
entries = []
for video in show_data.get('videos', []):
slug = video.get('slug')
if not slug:
continue
entries.append(self.url_result(
'http://adultswim.com/videos/%s/%s' % (show_path, slug),
'AdultSwim', video.get('id')))
return self.playlist_result(
entries, show_data.get('id'), show_data.get('title'),
strip_or_none(show_data.get('metadata', {}).get('description')))
episode_id = video_info['id']
episode_title = video_info['title']
episode_description = video_info.get('description')
episode_duration = int_or_none(video_info.get('duration'))
view_count = int_or_none(video_info.get('views'))
video_data = show_data['sluggedVideo']
video_id = video_data['id']
entries = []
for part_num, segment_id in enumerate(segment_ids):
segement_info = self._extract_cvp_info(
'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id,
segment_id, {
'secure': {
'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do',
},
})
segment_title = '%s - %s' % (show_title, episode_title)
if len(segment_ids) > 1:
segment_title += ' Part %d' % (part_num + 1)
segement_info.update({
'id': segment_id,
'title': segment_title,
'description': episode_description,
info = self._extract_cvp_info(
'http://www.adultswim.com/videos/api/v0/assets?platform=desktop&id=' + video_id,
video_id, {
'secure': {
'media_src': 'http://androidhls-secure.cdn.turner.com/adultswim/big',
'tokenizer_src': 'http://www.adultswim.com/astv/mvpd/processors/services/token_ipadAdobe.do',
},
}, {
'url': url,
'site_name': 'AdultSwim',
'auth_required': video_data.get('auth'),
})
entries.append(segement_info)
return {
'_type': 'playlist',
'id': episode_id,
'display_id': episode_path,
'entries': entries,
'title': '%s - %s' % (show_title, episode_title),
'description': episode_description,
'duration': episode_duration,
'view_count': view_count,
}
info.update({
'id': video_id,
'display_id': display_id,
'description': info.get('description') or strip_or_none(video_data.get('description')),
})
if not is_stream:
info.update({
'duration': info.get('duration') or int_or_none(video_data.get('duration')),
'timestamp': info.get('timestamp') or int_or_none(video_data.get('launch_date')),
'season_number': info.get('season_number') or int_or_none(video_data.get('season_number')),
'episode': info['title'],
'episode_number': info.get('episode_number') or int_or_none(video_data.get('episode_number')),
})
info['series'] = video_data.get('collection_title') or info.get('series')
if info['series'] and info['series'] != info['title']:
info['title'] = '%s - %s' % (info['series'], info['title'])
return info

View File

@ -4,9 +4,9 @@ from .common import InfoExtractor
class AlJazeeraIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/programmes/.*?/(?P<id>[^/]+)\.html'
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
_TEST = {
_TESTS = [{
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
'info_dict': {
'id': '3792260579001',
@ -19,7 +19,10 @@ class AlJazeeraIE(InfoExtractor):
},
'add_ie': ['BrightcoveNew'],
'skip': 'Not accessible from Travis CI server',
}
}, {
'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
'only_matching': True,
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
def _real_extract(self, url):

View File

@ -145,7 +145,7 @@ class BandcampIE(InfoExtractor):
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'thumbnail': info.get('thumb_url') or thumbnail,
'uploader': info.get('artist'),
'artist': artist,
'track': track,

View File

@ -122,6 +122,11 @@ class BiliBiliIE(InfoExtractor):
'preference': -2 if 'hd.mp4' in backup_url else -3,
})
for a_format in formats:
a_format.setdefault('http_headers', {}).update({
'Referer': url,
})
self._sort_formats(formats)
entries.append({

View File

@ -5,6 +5,7 @@ import re
import json
from .common import InfoExtractor
from .adobepass import AdobePassIE
from ..compat import (
compat_etree_fromstring,
compat_parse_qs,
@ -448,7 +449,7 @@ class BrightcoveLegacyIE(InfoExtractor):
return info
class BrightcoveNewIE(InfoExtractor):
class BrightcoveNewIE(AdobePassIE):
IE_NAME = 'brightcove:new'
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+|ref:[^&]+)'
_TESTS = [{
@ -602,6 +603,20 @@ class BrightcoveNewIE(InfoExtractor):
raise ExtractorError(message, expected=True)
raise
errors = json_data.get('errors')
if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
custom_fields = json_data['custom_fields']
tve_token = self._extract_mvpd_auth(
smuggled_data['source_url'], video_id,
custom_fields['bcadobepassrequestorid'],
custom_fields['bcadobepassresourceid'])
json_data = self._download_json(
api_url, video_id, headers={
'Accept': 'application/json;pk=%s' % policy_key
}, query={
'tveToken': tve_token,
})
title = json_data['name'].strip()
formats = []
@ -667,7 +682,6 @@ class BrightcoveNewIE(InfoExtractor):
})
formats.append(f)
errors = json_data.get('errors')
if not formats and errors:
error = errors[0]
raise ExtractorError(
@ -684,7 +698,7 @@ class BrightcoveNewIE(InfoExtractor):
is_live = False
duration = float_or_none(json_data.get('duration'), 1000)
if duration and duration < 0:
if duration is not None and duration <= 0:
is_live = True
return {

View File

@ -9,7 +9,10 @@ from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
multipart_encode,
parse_duration,
random_birthday,
urljoin,
)
@ -27,7 +30,8 @@ class CDAIE(InfoExtractor):
'description': 'md5:269ccd135d550da90d1662651fcb9772',
'thumbnail': r're:^https?://.*\.jpg$',
'average_rating': float,
'duration': 39
'duration': 39,
'age_limit': 0,
}
}, {
'url': 'http://www.cda.pl/video/57413289',
@ -41,13 +45,41 @@ class CDAIE(InfoExtractor):
'uploader': 'crash404',
'view_count': int,
'average_rating': float,
'duration': 137
'duration': 137,
'age_limit': 0,
}
}, {
# Age-restricted
'url': 'http://www.cda.pl/video/1273454c4',
'info_dict': {
'id': '1273454c4',
'ext': 'mp4',
'title': 'Bronson (2008) napisy HD 1080p',
'description': 'md5:1b6cb18508daf2dc4e0fa4db77fec24c',
'height': 1080,
'uploader': 'boniek61',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 5554,
'age_limit': 18,
'view_count': int,
'average_rating': float,
},
}, {
'url': 'http://ebd.cda.pl/0x0/5749950c',
'only_matching': True,
}]
def _download_age_confirm_page(self, url, video_id, *args, **kwargs):
form_data = random_birthday('rok', 'miesiac', 'dzien')
form_data.update({'return': url, 'module': 'video', 'module_id': video_id})
data, content_type = multipart_encode(form_data)
return self._download_webpage(
urljoin(url, '/a/validatebirth'), video_id, *args,
data=data, headers={
'Referer': url,
'Content-Type': content_type,
}, **kwargs)
def _real_extract(self, url):
video_id = self._match_id(url)
self._set_cookie('cda.pl', 'cda.player', 'html5')
@ -57,6 +89,13 @@ class CDAIE(InfoExtractor):
if 'Ten film jest dostępny dla użytkowników premium' in webpage:
raise ExtractorError('This video is only available for premium users.', expected=True)
need_confirm_age = False
if self._html_search_regex(r'(<form[^>]+action="/a/validatebirth")',
webpage, 'birthday validate form', default=None):
webpage = self._download_age_confirm_page(
url, video_id, note='Confirming age')
need_confirm_age = True
formats = []
uploader = self._search_regex(r'''(?x)
@ -81,6 +120,7 @@ class CDAIE(InfoExtractor):
'thumbnail': self._og_search_thumbnail(webpage),
'formats': formats,
'duration': None,
'age_limit': 18 if need_confirm_age else 0,
}
def extract_format(page, version):
@ -121,7 +161,12 @@ class CDAIE(InfoExtractor):
for href, resolution in re.findall(
r'<a[^>]+data-quality="[^"]+"[^>]+href="([^"]+)"[^>]+class="quality-btn"[^>]*>([0-9]+p)',
webpage):
webpage = self._download_webpage(
if need_confirm_age:
handler = self._download_age_confirm_page
else:
handler = self._download_webpage
webpage = handler(
self._BASE_URL + href, video_id,
'Downloading %s version information' % resolution, fatal=False)
if not webpage:
@ -129,6 +174,7 @@ class CDAIE(InfoExtractor):
# invalid version is requested.
self.report_warning('Unable to download %s version information' % resolution)
continue
extract_format(webpage, resolution)
self._sort_formats(formats)

View File

@ -2174,7 +2174,7 @@ class InfoExtractor(object):
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
formats = []
hdcore_sign = 'hdcore=3.7.0'
f4m_url = re.sub(r'(https?://[^/+])/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
hds_host = hosts.get('hds')
if hds_host:
f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url)

View File

@ -21,9 +21,10 @@ class CrackleIE(InfoExtractor):
'season_number': 8,
'episode_number': 4,
'subtitles': {
'en-US': [{
'ext': 'ttml',
}]
'en-US': [
{'ext': 'vtt'},
{'ext': 'tt'},
]
},
},
'params': {

View File

@ -10,6 +10,7 @@ from ..utils import (
smuggle_url,
determine_ext,
ExtractorError,
extract_attributes,
)
from .senateisvp import SenateISVPIE
from .ustream import UstreamIE
@ -68,6 +69,7 @@ class CSpanIE(InfoExtractor):
'uploader_id': '12987475',
},
}]
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
def _real_extract(self, url):
video_id = self._match_id(url)
@ -78,6 +80,19 @@ class CSpanIE(InfoExtractor):
if ustream_url:
return self.url_result(ustream_url, UstreamIE.ie_key())
if '&vod' not in url:
bc = self._search_regex(
r"(<[^>]+id='brightcove-player-embed'[^>]+>)",
webpage, 'brightcove embed', default=None)
if bc:
bc_attr = extract_attributes(bc)
bc_url = self.BRIGHTCOVE_URL_TEMPLATE % (
bc_attr.get('data-bcaccountid', '3162030207001'),
bc_attr.get('data-noprebcplayerid', 'SyGGpuJy3g'),
bc_attr.get('data-newbcplayerid', 'default'),
bc_attr['data-bcid'])
return self.url_result(smuggle_url(bc_url, {'source_url': url}))
# We first look for clipid, because clipprog always appears before
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
results = list(filter(None, (re.search(p, webpage) for p in patterns)))

View File

@ -20,7 +20,7 @@ class DRTVIE(InfoExtractor):
IE_NAME = 'drtv'
_TESTS = [{
'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
'md5': '25e659cccc9a2ed956110a299fdf5983',
'md5': '7ae17b4e18eb5d29212f424a7511c184',
'info_dict': {
'id': 'klassen-darlig-taber-10',
'ext': 'mp4',
@ -30,21 +30,37 @@ class DRTVIE(InfoExtractor):
'upload_date': '20160823',
'duration': 606.84,
},
'params': {
'skip_download': True,
},
}, {
# embed
'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
'md5': '2c37175c718155930f939ef59952474a',
'info_dict': {
'id': 'christiania-pusher-street-ryddes-drdkrjpo',
'ext': 'mp4',
'title': 'LIVE Christianias rydning af Pusher Street er i gang',
'description': '- Det er det fedeste, der er sket i 20 år, fortæller christianit til DR Nyheder.',
'description': 'md5:2a71898b15057e9b97334f61d04e6eb5',
'timestamp': 1472800279,
'upload_date': '20160902',
'duration': 131.4,
},
'params': {
'skip_download': True,
},
}, {
# with SignLanguage formats
'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
'info_dict': {
'id': 'historien-om-danmark-stenalder',
'ext': 'mp4',
'title': 'Historien om Danmark: Stenalder (1)',
'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
'timestamp': 1490401996,
'upload_date': '20170325',
'duration': 3502.04,
'formats': 'mincount:20',
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url):
@ -88,7 +104,7 @@ class DRTVIE(InfoExtractor):
elif kind in ('VideoResource', 'AudioResource'):
duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
restricted_to_denmark = asset.get('RestrictedToDenmark')
spoken_subtitles = asset.get('Target') == 'SpokenSubtitles'
asset_target = asset.get('Target')
for link in asset.get('Links', []):
uri = link.get('Uri')
if not uri:
@ -96,9 +112,9 @@ class DRTVIE(InfoExtractor):
target = link.get('Target')
format_id = target or ''
preference = None
if spoken_subtitles:
if asset_target in ('SpokenSubtitles', 'SignLanguage'):
preference = -1
format_id += '-spoken-subtitles'
format_id += '-%s' % asset_target
if target == 'HDS':
f4m_formats = self._extract_f4m_formats(
uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',

View File

@ -350,9 +350,9 @@ from .foxsports import FoxSportsIE
from .franceculture import FranceCultureIE
from .franceinter import FranceInterIE
from .francetv import (
PluzzIE,
FranceTvInfoIE,
FranceTVIE,
FranceTVEmbedIE,
FranceTVInfoIE,
GenerationQuoiIE,
CultureboxIE,
)
@ -663,6 +663,7 @@ from .nintendo import NintendoIE
from .njpwworld import NJPWWorldIE
from .nobelprize import NobelPrizeIE
from .noco import NocoIE
from .nonktube import NonkTubeIE
from .noovo import NoovoIE
from .normalboots import NormalbootsIE
from .nosvideo import NosVideoIE
@ -1125,6 +1126,7 @@ from .vgtv import (
from .vh1 import VH1IE
from .vice import (
ViceIE,
ViceArticleIE,
ViceShowIE,
)
from .viceland import VicelandIE

View File

@ -21,11 +21,13 @@ from .dailymotion import (
class FranceTVBaseInfoExtractor(InfoExtractor):
def _extract_video(self, video_id, catalogue):
def _extract_video(self, video_id, catalogue=None):
info = self._download_json(
'http://webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=%s&catalogue=%s'
% (video_id, catalogue),
video_id, 'Downloading video JSON')
'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/',
video_id, 'Downloading video JSON', query={
'idDiffusion': video_id,
'catalogue': catalogue or '',
})
if info.get('status') == 'NOK':
raise ExtractorError(
@ -109,27 +111,94 @@ class FranceTVBaseInfoExtractor(InfoExtractor):
}
class PluzzIE(FranceTVBaseInfoExtractor):
IE_NAME = 'pluzz.francetv.fr'
_VALID_URL = r'https?://(?:m\.)?pluzz\.francetv\.fr/videos/(?P<id>.+?)\.html'
class FranceTVIE(FranceTVBaseInfoExtractor):
_VALID_URL = r'https?://(?:www\.)?france\.tv/(?:[^/]+/)+(?P<id>[^/]+)\.html'
# Can't use tests, videos expire in 7 days
_TESTS = [{
'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
'info_dict': {
'id': '157550144',
'ext': 'mp4',
'title': '13h15, le dimanche... - Les mystères de Jésus',
'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
'timestamp': 1494156300,
'upload_date': '20170507',
},
'params': {
# m3u8 downloads
'skip_download': True,
},
}, {
# france3
'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
'only_matching': True,
}, {
# france4
'url': 'https://www.france.tv/france-4/hero-corp/saison-1/134151-apres-le-calme.html',
'only_matching': True,
}, {
# france5
'url': 'https://www.france.tv/france-5/c-a-dire/saison-10/137013-c-a-dire.html',
'only_matching': True,
}, {
# franceo
'url': 'https://www.france.tv/france-o/archipels/132249-mon-ancetre-l-esclave.html',
'only_matching': True,
}, {
# france2 live
'url': 'https://www.france.tv/france-2/direct.html',
'only_matching': True,
}, {
'url': 'https://www.france.tv/documentaires/histoire/136517-argentine-les-500-bebes-voles-de-la-dictature.html',
'only_matching': True,
}, {
'url': 'https://www.france.tv/jeux-et-divertissements/divertissements/133965-le-web-contre-attaque.html',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
video_id = self._html_search_meta(
'id_video', webpage, 'video id', default=None)
catalogue = None
video_id = self._search_regex(
r'data-main-video=(["\'])(?P<id>(?:(?!\1).)+)\1',
webpage, 'video id', default=None, group='id')
if not video_id:
video_id = self._search_regex(
r'data-diffusion=["\'](\d+)', webpage, 'video id')
return self._extract_video(video_id, 'Pluzz')
video_id, catalogue = self._html_search_regex(
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
webpage, 'video ID').split('@')
return self._extract_video(video_id, catalogue)
class FranceTvInfoIE(FranceTVBaseInfoExtractor):
class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
_VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P<id>[^&]+)'
_TEST = {
'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961',
'info_dict': {
'id': 'NI_983319',
'ext': 'mp4',
'title': 'Le Pen Reims',
'upload_date': '20170505',
'timestamp': 1493981780,
'duration': 16,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
video = self._download_json(
'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id,
video_id)
return self._extract_video(video['video_id'], video.get('catalog'))
class FranceTVInfoIE(FranceTVBaseInfoExtractor):
IE_NAME = 'francetvinfo.fr'
_VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<title>[^/?#&.]+)'
@ -233,124 +302,6 @@ class FranceTvInfoIE(FranceTVBaseInfoExtractor):
return self._extract_video(video_id, catalogue)
class FranceTVIE(FranceTVBaseInfoExtractor):
IE_NAME = 'francetv'
IE_DESC = 'France 2, 3, 4, 5 and Ô'
_VALID_URL = r'''(?x)
https?://
(?:
(?:www\.)?france[2345o]\.fr/
(?:
emissions/[^/]+/(?:videos|diffusions)|
emission/[^/]+|
videos|
jt
)
/|
embed\.francetv\.fr/\?ue=
)
(?P<id>[^/?]+)
'''
_TESTS = [
# france2
{
'url': 'http://www.france2.fr/emissions/13h15-le-samedi-le-dimanche/videos/75540104',
'md5': 'c03fc87cb85429ffd55df32b9fc05523',
'info_dict': {
'id': '109169362',
'ext': 'flv',
'title': '13h15, le dimanche...',
'description': 'md5:9a0932bb465f22d377a449be9d1a0ff7',
'upload_date': '20140914',
'timestamp': 1410693600,
},
},
# france3
{
'url': 'http://www.france3.fr/emissions/pieces-a-conviction/diffusions/13-11-2013_145575',
'md5': '679bb8f8921f8623bd658fa2f8364da0',
'info_dict': {
'id': '000702326_CAPP_PicesconvictionExtrait313022013_120220131722_Au',
'ext': 'mp4',
'title': 'Le scandale du prix des médicaments',
'description': 'md5:1384089fbee2f04fc6c9de025ee2e9ce',
'upload_date': '20131113',
'timestamp': 1384380000,
},
},
# france4
{
'url': 'http://www.france4.fr/emissions/hero-corp/videos/rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
'md5': 'a182bf8d2c43d88d46ec48fbdd260c1c',
'info_dict': {
'id': 'rhozet_herocorp_bonus_1_20131106_1923_06112013172108_F4',
'ext': 'mp4',
'title': 'Hero Corp Making of - Extrait 1',
'description': 'md5:c87d54871b1790679aec1197e73d650a',
'upload_date': '20131106',
'timestamp': 1383766500,
},
},
# france5
{
'url': 'http://www.france5.fr/emissions/c-a-dire/videos/quels_sont_les_enjeux_de_cette_rentree_politique__31-08-2015_908948?onglet=tous&page=1',
'md5': 'f6c577df3806e26471b3d21631241fd0',
'info_dict': {
'id': '123327454',
'ext': 'flv',
'title': 'C à dire ?! - Quels sont les enjeux de cette rentrée politique ?',
'description': 'md5:4a0d5cb5dce89d353522a84462bae5a4',
'upload_date': '20150831',
'timestamp': 1441035120,
},
},
# franceo
{
'url': 'http://www.franceo.fr/jt/info-soir/18-07-2015',
'md5': '47d5816d3b24351cdce512ad7ab31da8',
'info_dict': {
'id': '125377621',
'ext': 'flv',
'title': 'Infô soir',
'description': 'md5:01b8c6915a3d93d8bbbd692651714309',
'upload_date': '20150718',
'timestamp': 1437241200,
'duration': 414,
},
},
{
# francetv embed
'url': 'http://embed.francetv.fr/?ue=8d7d3da1e3047c42ade5a5d7dfd3fc87',
'info_dict': {
'id': 'EV_30231',
'ext': 'flv',
'title': 'Alcaline, le concert avec Calogero',
'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
'upload_date': '20150226',
'timestamp': 1424989860,
'duration': 5400,
},
},
{
'url': 'http://www.france4.fr/emission/highlander/diffusion-du-17-07-2015-04h05',
'only_matching': True,
},
{
'url': 'http://www.franceo.fr/videos/125377617',
'only_matching': True,
}
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_id, catalogue = self._html_search_regex(
r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
webpage, 'video ID').split('@')
return self._extract_video(video_id, catalogue)
class GenerationQuoiIE(InfoExtractor):
IE_NAME = 'france2.fr:generation-quoi'
_VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)'

View File

@ -2,15 +2,11 @@
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_urllib_parse_unquote_plus,
)
from ..compat import compat_HTTPError
from ..utils import (
determine_ext,
int_or_none,
js_to_json,
sanitized_Request,
ExtractorError,
urlencode_postdata
)
@ -20,6 +16,7 @@ class FunimationIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)'
_NETRC_MACHINE = 'funimation'
_TOKEN = None
_TESTS = [{
'url': 'https://www.funimation.com/shows/hacksign/role-play/',
@ -38,56 +35,38 @@ class FunimationIE(InfoExtractor):
}, {
'url': 'https://www.funimation.com/shows/attack-on-titan-junior-high/broadcast-dub-preview/',
'info_dict': {
'id': '9635',
'id': '210051',
'display_id': 'broadcast-dub-preview',
'ext': 'mp4',
'title': 'Attack on Titan: Junior High - Broadcast Dub Preview',
'description': 'md5:f8ec49c0aff702a7832cd81b8a44f803',
'thumbnail': r're:https?://.*\.(?:jpg|png)',
},
'skip': 'Access without user interaction is forbidden by CloudFlare',
'params': {
# m3u8 download
'skip_download': True,
},
}, {
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
'only_matching': True,
}]
_LOGIN_URL = 'http://www.funimation.com/login'
def _extract_cloudflare_session_ua(self, url):
ci_session_cookie = self._get_cookies(url).get('ci_session')
if ci_session_cookie:
ci_session = compat_urllib_parse_unquote_plus(ci_session_cookie.value)
# ci_session is a string serialized by PHP function serialize()
# This case is simple enough to use regular expressions only
return self._search_regex(
r'"user_agent";s:\d+:"([^"]+)"', ci_session, 'user agent',
default=None)
def _login(self):
(username, password) = self._get_login_info()
if username is None:
return
data = urlencode_postdata({
'email_field': username,
'password_field': password,
})
user_agent = self._extract_cloudflare_session_ua(self._LOGIN_URL)
if not user_agent:
user_agent = 'Mozilla/5.0 (Windows NT 5.2; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0'
login_request = sanitized_Request(self._LOGIN_URL, data, headers={
'User-Agent': user_agent,
'Content-Type': 'application/x-www-form-urlencoded'
})
login_page = self._download_webpage(
login_request, None, 'Logging in as %s' % username)
if any(p in login_page for p in ('funimation.com/logout', '>Log Out<')):
return
error = self._html_search_regex(
r'(?s)<div[^>]+id=["\']errorMessages["\'][^>]*>(.+?)</div>',
login_page, 'error messages', default=None)
if error:
raise ExtractorError('Unable to login: %s' % error, expected=True)
raise ExtractorError('Unable to log in')
try:
data = self._download_json(
'https://prod-api-funimationnow.dadcdigital.com/api/auth/login/',
None, 'Logging in as %s' % username, data=urlencode_postdata({
'username': username,
'password': password,
}))
self._TOKEN = data['token']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
error = self._parse_json(e.cause.read().decode(), None)['error']
raise ExtractorError(error, expected=True)
raise
def _real_initialize(self):
self._login()
@ -125,9 +104,12 @@ class FunimationIE(InfoExtractor):
description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True)
try:
headers = {}
if self._TOKEN:
headers['Authorization'] = 'Token %s' % self._TOKEN
sources = self._download_json(
'https://prod-api-funimationnow.dadcdigital.com/api/source/catalog/video/%s/signed/' % video_id,
video_id)['items']
video_id, headers=headers)['items']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
error = self._parse_json(e.cause.read(), video_id)['errors'][0]

View File

@ -75,6 +75,19 @@ class GDCVaultIE(InfoExtractor):
'format': 'jp', # The japanese audio
}
},
{
# gdc-player.html
'url': 'http://www.gdcvault.com/play/1435/An-American-engine-in-Tokyo',
'info_dict': {
'id': '1435',
'display_id': 'An-American-engine-in-Tokyo',
'ext': 'flv',
'title': 'An American Engine in Tokyo:/nThe collaboration of Epic Games and Square Enix/nFor THE LAST REMINANT',
},
'params': {
'skip_download': True, # Requires rtmpdump
},
},
]
def _login(self, webpage_url, display_id):
@ -128,7 +141,7 @@ class GDCVaultIE(InfoExtractor):
'title': title,
}
PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/player.*?\.html.*?".*?</iframe>'
PLAYER_REGEX = r'<iframe src="(?P<xml_root>.+?)/(?:gdc-)?player.*?\.html.*?".*?</iframe>'
xml_root = self._html_search_regex(
PLAYER_REGEX, start_page, 'xml root', default=None)

View File

@ -1,6 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
@ -8,15 +10,15 @@ from ..utils import (
urlencode_postdata,
xpath_element,
xpath_text,
urljoin,
update_url_query,
js_to_json,
)
class Laola1TvEmbedIE(InfoExtractor):
IE_NAME = 'laola1tv:embed'
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/titanplayer\.php\?.*?\bvideoid=(?P<id>\d+)'
_TEST = {
_TESTS = [{
# flashvars.premium = "false";
'url': 'https://www.laola1.tv/titanplayer.php?videoid=708065&type=V&lang=en&portal=int&customer=1024',
'info_dict': {
@ -26,7 +28,30 @@ class Laola1TvEmbedIE(InfoExtractor):
'uploader': 'ITTF - International Table Tennis Federation',
'upload_date': '20161211',
},
}
}]
def _extract_token_url(self, stream_access_url, video_id, data):
return self._download_json(
stream_access_url, video_id, headers={
'Content-Type': 'application/json',
}, data=json.dumps(data).encode())['data']['stream-access'][0]
def _extract_formats(self, token_url, video_id):
token_doc = self._download_xml(
token_url, video_id, 'Downloading token',
headers=self.geo_verification_headers())
token_attrib = xpath_element(token_doc, './/token').attrib
if token_attrib['status'] != '0':
raise ExtractorError(
'Token error: %s' % token_attrib['comment'], expected=True)
formats = self._extract_akamai_formats(
'%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']),
video_id)
self._sort_formats(formats)
return formats
def _real_extract(self, url):
video_id = self._match_id(url)
@ -68,29 +93,16 @@ class Laola1TvEmbedIE(InfoExtractor):
else:
data_abo = urlencode_postdata(
dict((i, v) for i, v in enumerate(_v('req_liga_abos').split(','))))
token_url = self._download_json(
'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access',
video_id, query={
stream_access_url = update_url_query(
'https://club.laola1.tv/sp/laola1/api/v3/user/session/premium/player/stream-access', {
'videoId': _v('id'),
'target': self._search_regex(r'vs_target = (\d+);', webpage, 'vs target'),
'label': _v('label'),
'area': _v('area'),
}, data=data_abo)['data']['stream-access'][0]
})
token_url = self._extract_token_url(stream_access_url, video_id, data_abo)
token_doc = self._download_xml(
token_url, video_id, 'Downloading token',
headers=self.geo_verification_headers())
token_attrib = xpath_element(token_doc, './/token').attrib
if token_attrib['status'] != '0':
raise ExtractorError(
'Token error: %s' % token_attrib['comment'], expected=True)
formats = self._extract_akamai_formats(
'%s?hdnea=%s' % (token_attrib['url'], token_attrib['auth']),
video_id)
self._sort_formats(formats)
formats = self._extract_formats(token_url, video_id)
categories_str = _v('meta_sports')
categories = categories_str.split(',') if categories_str else []
@ -107,7 +119,7 @@ class Laola1TvEmbedIE(InfoExtractor):
}
class Laola1TvIE(InfoExtractor):
class Laola1TvIE(Laola1TvEmbedIE):
IE_NAME = 'laola1tv'
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
_TESTS = [{
@ -164,13 +176,42 @@ class Laola1TvIE(InfoExtractor):
if 'Dieser Livestream ist bereits beendet.' in webpage:
raise ExtractorError('This live stream has already finished.', expected=True)
iframe_url = urljoin(url, self._search_regex(
r'<iframe[^>]*?id="videoplayer"[^>]*?src="([^"]+)"',
webpage, 'iframe url'))
conf = self._parse_json(self._search_regex(
r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
display_id, js_to_json)
video_id = conf['videoid']
config = self._download_json(conf['configUrl'], video_id, query={
'videoid': video_id,
'partnerid': conf['partnerid'],
'language': conf.get('language', ''),
'portal': conf.get('portalid', ''),
})
error = config.get('error')
if error:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
video_data = config['video']
title = video_data['title']
is_live = video_data.get('isLivestream') and video_data.get('isLive')
meta = video_data.get('metaInformation')
sports = meta.get('sports')
categories = sports.split(',') if sports else []
token_url = self._extract_token_url(
video_data['streamAccess'], video_id,
video_data['abo']['required'])
formats = self._extract_formats(token_url, video_id)
return {
'_type': 'url',
'id': video_id,
'display_id': display_id,
'url': iframe_url,
'ie_key': 'Laola1TvEmbed',
'title': self._live_title(title) if is_live else title,
'description': video_data.get('description'),
'thumbnail': video_data.get('image'),
'categories': categories,
'formats': formats,
'is_live': is_live,
}

View File

@ -12,64 +12,62 @@ from ..utils import (
class MySpaceIE(InfoExtractor):
_VALID_URL = r'https?://myspace\.com/([^/]+)/(?P<mediatype>video/[^/]+/|music/song/.*?)(?P<id>\d+)'
_VALID_URL = r'''(?x)
https?://
myspace\.com/[^/]+/
(?P<mediatype>
video/[^/]+/(?P<video_id>\d+)|
music/song/[^/?#&]+-(?P<song_id>\d+)-\d+(?:[/?#&]|$)
)
'''
_TESTS = [
{
'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
'md5': '9c1483c106f4a695c47d2911feed50a7',
'info_dict': {
'id': '109594919',
'ext': 'mp4',
'title': 'Little Big Town',
'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
'uploader': 'Five Minutes to the Stage',
'uploader_id': 'fiveminutestothestage',
'timestamp': 1414108751,
'upload_date': '20141023',
},
_TESTS = [{
'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
'md5': '9c1483c106f4a695c47d2911feed50a7',
'info_dict': {
'id': '109594919',
'ext': 'mp4',
'title': 'Little Big Town',
'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
'uploader': 'Five Minutes to the Stage',
'uploader_id': 'fiveminutestothestage',
'timestamp': 1414108751,
'upload_date': '20141023',
},
}, {
# songs
{
'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
'md5': '1d7ee4604a3da226dd69a123f748b262',
'info_dict': {
'id': '93388656',
'ext': 'm4a',
'title': 'Of weakened soul...',
'uploader': 'Killsorrow',
'uploader_id': 'killsorrow',
},
}, {
'add_ie': ['Youtube'],
'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
'info_dict': {
'id': 'xqds0B_meys',
'ext': 'webm',
'title': 'Three Days Grace - Animal I Have Become',
'description': 'md5:8bd86b3693e72a077cf863a8530c54bb',
'uploader': 'ThreeDaysGraceVEVO',
'uploader_id': 'ThreeDaysGraceVEVO',
'upload_date': '20091002',
},
}, {
'add_ie': ['Youtube'],
'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
'info_dict': {
'id': 'ypWvQgnJrSU',
'ext': 'mp4',
'title': 'Starset - First Light',
'description': 'md5:2d5db6c9d11d527683bcda818d332414',
'uploader': 'Yumi K',
'uploader_id': 'SorenPromotions',
'upload_date': '20140725',
}
'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
'md5': '1d7ee4604a3da226dd69a123f748b262',
'info_dict': {
'id': '93388656',
'ext': 'm4a',
'title': 'Of weakened soul...',
'uploader': 'Killsorrow',
'uploader_id': 'killsorrow',
},
]
}, {
'add_ie': ['Youtube'],
'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
'info_dict': {
'id': 'xqds0B_meys',
'ext': 'webm',
'title': 'Three Days Grace - Animal I Have Become',
'description': 'md5:8bd86b3693e72a077cf863a8530c54bb',
'uploader': 'ThreeDaysGraceVEVO',
'uploader_id': 'ThreeDaysGraceVEVO',
'upload_date': '20091002',
},
}, {
'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
'only_matching': True,
}, {
'url': 'https://myspace.com/thelargemouthbassband/music/song/02-pure-eyes.mp3-94422330-105113388',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
video_id = mobj.group('video_id') or mobj.group('song_id')
is_song = mobj.group('mediatype').startswith('music/song')
webpage = self._download_webpage(url, video_id)
player_url = self._search_regex(

View File

@ -5,10 +5,8 @@ import re
from .common import InfoExtractor
from .theplatform import ThePlatformIE
from .adobepass import AdobePassIE
from ..compat import compat_urllib_parse_urlparse
from ..utils import (
find_xpath_attr,
lowercase_escape,
smuggle_url,
unescapeHTML,
update_url_query,
@ -17,7 +15,7 @@ from ..utils import (
class NBCIE(AdobePassIE):
_VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)'
_VALID_URL = r'(?P<permalink>https?://(?:www\.)?nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+))'
_TESTS = [
{
@ -36,16 +34,6 @@ class NBCIE(AdobePassIE):
'skip_download': True,
},
},
{
'url': 'http://www.nbc.com/the-tonight-show/episodes/176',
'info_dict': {
'id': '176',
'ext': 'flv',
'title': 'Ricky Gervais, Steven Van Zandt, ILoveMakonnen',
'description': 'A brand new episode of The Tonight Show welcomes Ricky Gervais, Steven Van Zandt and ILoveMakonnen.',
},
'skip': '404 Not Found',
},
{
'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
'info_dict': {
@ -63,11 +51,6 @@ class NBCIE(AdobePassIE):
},
'skip': 'Only works from US',
},
{
# This video has expired but with an escaped embedURL
'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515',
'only_matching': True,
},
{
# HLS streams requires the 'hdnea3' cookie
'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
@ -88,59 +71,38 @@ class NBCIE(AdobePassIE):
]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
info = {
'_type': 'url_transparent',
'ie_key': 'ThePlatform',
'id': video_id,
permalink, video_id = re.match(self._VALID_URL, url).groups()
video_data = self._download_json(
'https://api.nbc.com/v3/videos', video_id, query={
'filter[permalink]': permalink,
})['data'][0]['attributes']
query = {
'mbr': 'true',
'manifest': 'm3u',
}
video_id = video_data['guid']
title = video_data['title']
if video_data.get('entitlement') == 'auth':
resource = self._get_mvpd_resource(
'nbcentertainment', title, video_id,
video_data.get('vChipRating'))
query['auth'] = self._extract_mvpd_auth(
url, video_id, 'nbcentertainment', resource)
theplatform_url = smuggle_url(update_url_query(
'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id,
query), {'force_smil_url': True})
return {
'_type': 'url_transparent',
'id': video_id,
'title': title,
'url': theplatform_url,
'description': video_data.get('description'),
'keywords': video_data.get('keywords'),
'season_number': int_or_none(video_data.get('seasonNumber')),
'episode_number': int_or_none(video_data.get('episodeNumber')),
'series': video_data.get('showName'),
'ie_key': 'ThePlatform',
}
video_data = None
preload = self._search_regex(
r'PRELOAD\s*=\s*({.+})', webpage, 'preload data', default=None)
if preload:
preload_data = self._parse_json(preload, video_id)
path = compat_urllib_parse_urlparse(url).path.rstrip('/')
entity_id = preload_data.get('xref', {}).get(path)
video_data = preload_data.get('entities', {}).get(entity_id)
if video_data:
query = {
'mbr': 'true',
'manifest': 'm3u',
}
video_id = video_data['guid']
title = video_data['title']
if video_data.get('entitlement') == 'auth':
resource = self._get_mvpd_resource(
'nbcentertainment', title, video_id,
video_data.get('vChipRating'))
query['auth'] = self._extract_mvpd_auth(
url, video_id, 'nbcentertainment', resource)
theplatform_url = smuggle_url(update_url_query(
'http://link.theplatform.com/s/NnzsPC/media/guid/2410887629/' + video_id,
query), {'force_smil_url': True})
info.update({
'id': video_id,
'title': title,
'url': theplatform_url,
'description': video_data.get('description'),
'keywords': video_data.get('keywords'),
'season_number': int_or_none(video_data.get('seasonNumber')),
'episode_number': int_or_none(video_data.get('episodeNumber')),
'series': video_data.get('showName'),
})
else:
theplatform_url = unescapeHTML(lowercase_escape(self._html_search_regex(
[
r'(?:class="video-player video-player-full" data-mpx-url|class="player" src)="(.*?)"',
r'<iframe[^>]+src="((?:https?:)?//player\.theplatform\.com/[^"]+)"',
r'"embedURL"\s*:\s*"([^"]+)"'
],
webpage, 'theplatform url').replace('_no_endcard', '').replace('\\/', '/')))
if theplatform_url.startswith('//'):
theplatform_url = 'http:' + theplatform_url
info['url'] = smuggle_url(theplatform_url, {'source_url': url})
return info
class NBCSportsVPlayerIE(InfoExtractor):

View File

@ -0,0 +1,33 @@
from __future__ import unicode_literals
from .nuevo import NuevoBaseIE
class NonkTubeIE(NuevoBaseIE):
_VALID_URL = r'https?://(?:www\.)?nonktube\.com/(?:(?:video|embed)/|media/nuevo/embed\.php\?.*?\bid=)(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.nonktube.com/video/118636/sensual-wife-uncensored-fucked-in-hairy-pussy-and-facialized',
'info_dict': {
'id': '118636',
'ext': 'mp4',
'title': 'Sensual Wife Uncensored Fucked In Hairy Pussy And Facialized',
'age_limit': 18,
'duration': 1150.98,
},
'params': {
'skip_download': True,
}
}, {
'url': 'https://www.nonktube.com/embed/118636',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = self._match_id(url)
info = self._extract_nuevo(
'https://www.nonktube.com/media/nuevo/econfig.php?key=%s'
% video_id, video_id)
info['age_limit'] = 18
return info

View File

@ -148,13 +148,34 @@ class NRKBaseIE(InfoExtractor):
vcodec = 'none' if data.get('mediaType') == 'Audio' else None
# TODO: extract chapters when https://github.com/rg3/youtube-dl/pull/9409 is merged
for entry in entries:
entry.update(common_info)
for f in entry['formats']:
f['vcodec'] = vcodec
points = data.get('shortIndexPoints')
if isinstance(points, list):
chapters = []
for next_num, point in enumerate(points, start=1):
if not isinstance(point, dict):
continue
start_time = parse_duration(point.get('startPoint'))
if start_time is None:
continue
end_time = parse_duration(
data.get('duration')
if next_num == len(points)
else points[next_num].get('startPoint'))
if end_time is None:
continue
chapters.append({
'start_time': start_time,
'end_time': end_time,
'title': point.get('title'),
})
if chapters and len(entries) == 1:
entries[0]['chapters'] = chapters
return self.playlist_result(entries, video_id, title, description)

View File

@ -10,9 +10,10 @@ from ..utils import (
class NuevoBaseIE(InfoExtractor):
def _extract_nuevo(self, config_url, video_id):
def _extract_nuevo(self, config_url, video_id, headers={}):
config = self._download_xml(
config_url, video_id, transform_source=lambda s: s.strip())
config_url, video_id, transform_source=lambda s: s.strip(),
headers=headers)
title = xpath_text(config, './title', 'title', fatal=True).strip()
video_id = xpath_text(config, './mediaid', default=video_id)

View File

@ -3,7 +3,10 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..compat import (
compat_str,
compat_HTTPError,
)
from ..utils import (
clean_html,
ExtractorError,
@ -11,6 +14,7 @@ from ..utils import (
strip_or_none,
unified_timestamp,
urljoin,
urlencode_postdata,
)
@ -34,6 +38,32 @@ class PacktPubIE(PacktPubBaseIE):
'upload_date': '20170331',
},
}
_NETRC_MACHINE = 'packtpub'
_TOKEN = None
def _real_initialize(self):
(username, password) = self._get_login_info()
if username is None:
return
webpage = self._download_webpage(self._PACKT_BASE, None)
login_form = self._form_hidden_inputs(
'packt-user-login-form', webpage)
login_form.update({
'email': username,
'password': password,
})
self._download_webpage(
self._PACKT_BASE, None, 'Logging in as %s' % username,
data=urlencode_postdata(login_form))
try:
self._TOKEN = self._download_json(
'%s/users/tokens/sessions' % self._MAPT_REST, None,
'Downloading Authorization Token')['data']['token']
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 404):
message = self._parse_json(e.cause.read().decode(), None)['message']
raise ExtractorError(message, expected=True)
raise
def _handle_error(self, response):
if response.get('status') != 'success':
@ -51,14 +81,17 @@ class PacktPubIE(PacktPubBaseIE):
course_id, chapter_id, video_id = mobj.group(
'course_id', 'chapter_id', 'id')
headers = {}
if self._TOKEN:
headers['Authorization'] = self._TOKEN
video = self._download_json(
'%s/users/me/products/%s/chapters/%s/sections/%s'
% (self._MAPT_REST, course_id, chapter_id, video_id), video_id,
'Downloading JSON video')['data']
'Downloading JSON video', headers=headers)['data']
content = video.get('content')
if not content:
raise ExtractorError('This video is locked', expected=True)
self.raise_login_required('This video is locked')
video_url = content['file']

View File

@ -10,6 +10,7 @@ from ..utils import (
int_or_none,
float_or_none,
js_to_json,
orderedSet,
strip_jsonp,
strip_or_none,
unified_strdate,
@ -264,6 +265,13 @@ class PBSIE(InfoExtractor):
},
'playlist_count': 2,
},
{
'url': 'http://www.pbs.org/wgbh/americanexperience/films/great-war/',
'info_dict': {
'id': 'great-war',
},
'playlist_count': 3,
},
{
'url': 'http://www.pbs.org/wgbh/americanexperience/films/death/player/',
'info_dict': {
@ -382,10 +390,10 @@ class PBSIE(InfoExtractor):
# tabbed frontline videos
MULTI_PART_REGEXES = (
r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"',
r'<a[^>]+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)',
r'<a[^>]+href=["\']#(?:video-|part)\d+["\'][^>]+data-cove[Ii]d=["\'](\d+)',
)
for p in MULTI_PART_REGEXES:
tabbed_videos = re.findall(p, webpage)
tabbed_videos = orderedSet(re.findall(p, webpage))
if tabbed_videos:
return tabbed_videos, presumptive_id, upload_date, description

View File

@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)|
(?:[a-z]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/
)
(?P<id>[\da-z]+)
@ -97,6 +97,9 @@ class PornHubIE(InfoExtractor):
}, {
'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
'only_matching': True,
}, {
'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
'only_matching': True,
}]
@staticmethod

View File

@ -13,21 +13,20 @@ class RMCDecouverteIE(InfoExtractor):
_VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/mediaplayer-replay.*?\bid=(?P<id>\d+)'
_TEST = {
'url': 'http://rmcdecouverte.bfmtv.com/mediaplayer-replay/?id=1430&title=LES%20HEROS%20DU%2088e%20ETAGE',
'url': 'http://rmcdecouverte.bfmtv.com/mediaplayer-replay/?id=13502&title=AQUAMEN:LES%20ROIS%20DES%20AQUARIUMS%20:UN%20DELICIEUX%20PROJET',
'info_dict': {
'id': '5111223049001',
'id': '5419055995001',
'ext': 'mp4',
'title': ': LES HEROS DU 88e ETAGE',
'description': 'Découvrez comment la bravoure de deux hommes dans la Tour Nord du World Trade Center a sauvé la vie d\'innombrables personnes le 11 septembre 2001.',
'title': 'UN DELICIEUX PROJET',
'description': 'md5:63610df7c8b1fc1698acd4d0d90ba8b5',
'uploader_id': '1969646226001',
'upload_date': '20160904',
'timestamp': 1472951103,
'upload_date': '20170502',
'timestamp': 1493745308,
},
'params': {
# rtmp download
'skip_download': True,
},
'skip': 'Only works from France',
'skip': 'only available for a week',
}
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s'
@ -35,5 +34,12 @@ class RMCDecouverteIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
brightcove_id = compat_parse_qs(compat_urlparse.urlparse(brightcove_legacy_url).query)['@videoPlayer'][0]
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
if brightcove_legacy_url:
brightcove_id = compat_parse_qs(compat_urlparse.urlparse(
brightcove_legacy_url).query)['@videoPlayer'][0]
else:
brightcove_id = self._search_regex(
r'data-video-id=["\'](\d+)', webpage, 'brightcove id')
return self.url_result(
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew',
brightcove_id)

View File

@ -80,14 +80,33 @@ class ThePlatformBaseIE(OnceIE):
'url': src,
})
duration = info.get('duration')
tp_chapters = info.get('chapters', [])
chapters = []
if tp_chapters:
def _add_chapter(start_time, end_time):
start_time = float_or_none(start_time, 1000)
end_time = float_or_none(end_time, 1000)
if start_time is None or end_time is None:
return
chapters.append({
'start_time': start_time,
'end_time': end_time,
})
for chapter in tp_chapters[:-1]:
_add_chapter(chapter.get('startTime'), chapter.get('endTime'))
_add_chapter(tp_chapters[-1].get('startTime'), tp_chapters[-1].get('endTime') or duration)
return {
'title': info['title'],
'subtitles': subtitles,
'description': info['description'],
'thumbnail': info['defaultThumbnailUrl'],
'duration': int_or_none(info.get('duration'), 1000),
'duration': float_or_none(duration, 1000),
'timestamp': int_or_none(info.get('pubDate'), 1000) or None,
'uploader': info.get('billingCode'),
'chapters': chapters,
}
def _extract_theplatform_metadata(self, path, video_id):

View File

@ -13,6 +13,7 @@ from ..utils import (
xpath_attr,
update_url_query,
ExtractorError,
strip_or_none,
)
@ -163,17 +164,21 @@ class TurnerBaseIE(AdobePassIE):
'height': int_or_none(image.get('height')),
} for image in video_data.findall('images/image')]
is_live = xpath_text(video_data, 'isLive') == 'true'
return {
'id': video_id,
'title': title,
'title': self._live_title(title) if is_live else title,
'formats': formats,
'subtitles': subtitles,
'thumbnails': thumbnails,
'description': xpath_text(video_data, 'description'),
'thumbnail': xpath_text(video_data, 'poster'),
'description': strip_or_none(xpath_text(video_data, 'description')),
'duration': parse_duration(xpath_text(video_data, 'length') or xpath_text(video_data, 'trt')),
'timestamp': self._extract_timestamp(video_data),
'upload_date': xpath_attr(video_data, 'metas', 'version'),
'series': xpath_text(video_data, 'showTitle'),
'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
'is_live': is_live,
}

View File

@ -20,7 +20,7 @@ from ..utils import (
class ViceBaseIE(AdobePassIE):
def _extract_preplay_video(self, url, webpage):
def _extract_preplay_video(self, url, locale, webpage):
watch_hub_data = extract_attributes(self._search_regex(
r'(?s)(<watch-hub\s*.+?</watch-hub>)', webpage, 'watch hub'))
video_id = watch_hub_data['vms-id']
@ -32,7 +32,8 @@ class ViceBaseIE(AdobePassIE):
resource = self._get_mvpd_resource(
'VICELAND', title, video_id,
watch_hub_data.get('video-rating'))
query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource)
query['tvetoken'] = self._extract_mvpd_auth(
url, video_id, 'VICELAND', resource)
# signature generation algorithm is reverse engineered from signatureGenerator in
# webpack:///../shared/~/vice-player/dist/js/vice-player.js in
@ -45,11 +46,14 @@ class ViceBaseIE(AdobePassIE):
try:
host = 'www.viceland' if is_locked else self._PREPLAY_HOST
preplay = self._download_json('https://%s.com/en_us/preplay/%s' % (host, video_id), video_id, query=query)
preplay = self._download_json(
'https://%s.com/%s/preplay/%s' % (host, locale, video_id),
video_id, query=query)
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
error = json.loads(e.cause.read().decode())
raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True)
raise ExtractorError('%s said: %s' % (
self.IE_NAME, error['details']), expected=True)
raise
video_data = preplay['video']
@ -88,41 +92,30 @@ class ViceBaseIE(AdobePassIE):
class ViceIE(ViceBaseIE):
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?videos?/(?P<id>[^/?#&]+)'
IE_NAME = 'vice'
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:(?P<locale>[^/]+)/)?videos?/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://www.vice.com/video/cowboy-capitalists-part-1',
'md5': 'e9d77741f9e42ba583e683cd170660f7',
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
'md5': '7d3ae2f9ba5f196cdd9f9efd43657ac2',
'info_dict': {
'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
'id': 'N2bzkydjraWDGwnt8jAttCF6Y0PDv4Zj',
'ext': 'flv',
'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
'duration': 725.983,
'title': 'Monkey Labs of Holland',
'description': 'md5:92b3c7dcbfe477f772dd4afa496c9149',
},
'add_ie': ['Ooyala'],
}, {
'url': 'http://www.vice.com/video/how-to-hack-a-car',
'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
'info_dict': {
'id': '3jstaBeXgAs',
'ext': 'mp4',
'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
'uploader_id': 'MotherboardTV',
'uploader': 'Motherboard',
'upload_date': '20140529',
},
'add_ie': ['Youtube'],
}, {
'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
'md5': '',
'info_dict': {
'id': '5816510690b70e6c5fd39a56',
'ext': 'mp4',
'uploader': 'Waypoint',
'title': 'The Signal From Tölva',
'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
'uploader_id': '57f7d621e05ca860fa9ccaf9',
'timestamp': 1477941983938,
'timestamp': 1477941983,
'upload_date': '20161031',
},
'params': {
# m3u8 download
@ -130,19 +123,31 @@ class ViceIE(ViceBaseIE):
},
'add_ie': ['UplynkPreplay'],
}, {
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
'only_matching': True,
'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
'info_dict': {
'id': '581b12b60a0e1f4c0fb6ea2f',
'ext': 'mp4',
'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
'description': '<p>Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.</p>',
'uploader': 'VICE',
'uploader_id': '57a204088cb727dec794c67b',
'timestamp': 1485368119,
'upload_date': '20170125',
'age_limit': 14,
},
'params': {
# AES-encrypted m3u8
'skip_download': True,
},
'add_ie': ['UplynkPreplay'],
}, {
'url': 'http://www.vice.com/ru/video/big-night-out-ibiza-clive-martin-229',
'only_matching': True,
}, {
'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show',
'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
'only_matching': True,
}]
_PREPLAY_HOST = 'video.vice'
def _real_extract(self, url):
video_id = self._match_id(url)
locale, video_id = re.match(self._VALID_URL, url).groups()
webpage, urlh = self._download_webpage_handle(url, video_id)
embed_code = self._search_regex(
r'embedCode=([^&\'"]+)', webpage,
@ -153,10 +158,11 @@ class ViceIE(ViceBaseIE):
r'data-youtube-id="([^"]+)"', webpage, 'youtube id', default=None)
if youtube_id:
return self.url_result(youtube_id, 'Youtube')
return self._extract_preplay_video(urlh.geturl(), webpage)
return self._extract_preplay_video(urlh.geturl(), locale, webpage)
class ViceShowIE(InfoExtractor):
IE_NAME = 'vice:show'
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)'
_TEST = {
@ -183,6 +189,86 @@ class ViceShowIE(InfoExtractor):
r'<title>(.+?)</title>', webpage, 'title', default=None)
if title:
title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip()
description = self._html_search_meta('description', webpage, 'description')
description = self._html_search_meta(
'description', webpage, 'description')
return self.playlist_result(entries, show_id, title, description)
class ViceArticleIE(InfoExtractor):
IE_NAME = 'vice:article'
_VALID_URL = r'https://www.vice.com/[^/]+/article/(?P<id>[^?#]+)'
_TESTS = [{
'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
'info_dict': {
'id': '58dc0a3dee202d2a0ccfcbd8',
'ext': 'mp4',
'title': 'Mormon War on Porn ',
'description': 'md5:ad396a2481e7f8afb5ed486878421090',
'uploader': 'VICE',
'uploader_id': '57a204088cb727dec794c693',
'timestamp': 1489160690,
'upload_date': '20170310',
},
'params': {
# AES-encrypted m3u8
'skip_download': True,
},
'add_ie': ['UplynkPreplay'],
}, {
'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
'info_dict': {
'id': '3jstaBeXgAs',
'ext': 'mp4',
'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
'uploader_id': 'MotherboardTV',
'uploader': 'Motherboard',
'upload_date': '20140529',
},
'add_ie': ['Youtube'],
}, {
'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
'only_matching': True,
}, {
'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
prefetch_data = self._parse_json(self._search_regex(
r'window\.__PREFETCH_DATA\s*=\s*({.*});',
webpage, 'prefetch data'), display_id)
body = prefetch_data['body']
def _url_res(video_url, ie_key):
return {
'_type': 'url_transparent',
'url': video_url,
'display_id': display_id,
'ie_key': ie_key,
}
embed_code = self._search_regex(
r'embedCode=([^&\'"]+)', body,
'ooyala embed code', default=None)
if embed_code:
return _url_res('ooyala:%s' % embed_code, 'Ooyala')
youtube_url = self._html_search_regex(
r'<iframe[^>]+src="(.*youtube\.com/.*)"',
body, 'YouTube URL', default=None)
if youtube_url:
return _url_res(youtube_url, 'Youtube')
video_url = self._html_search_regex(
r'data-video-url="([^"]+)"',
prefetch_data['embed_code'], 'video URL')
return _url_res(video_url, ViceIE.ie_key())

View File

@ -1,11 +1,13 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .vice import ViceBaseIE
class VicelandIE(ViceBaseIE):
_VALID_URL = r'https?://(?:www\.)?viceland\.com/[^/]+/video/[^/]+/(?P<id>[a-f0-9]+)'
_VALID_URL = r'https?://(?:www\.)?viceland\.com/(?P<locale>[^/]+)/video/[^/]+/(?P<id>[a-f0-9]+)'
_TEST = {
'url': 'https://www.viceland.com/en_us/video/trapped/588a70d0dba8a16007de7316',
'info_dict': {
@ -24,10 +26,13 @@ class VicelandIE(ViceBaseIE):
'skip_download': True,
},
'add_ie': ['UplynkPreplay'],
'skip': '404',
}
_PREPLAY_HOST = 'www.viceland'
def _real_extract(self, url):
video_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
locale = mobj.group('locale')
webpage = self._download_webpage(url, video_id)
return self._extract_preplay_video(url, webpage)
return self._extract_preplay_video(url, locale, webpage)

View File

@ -1,7 +1,6 @@
# coding: utf-8
from __future__ import unicode_literals
import random
import re
from .common import InfoExtractor
@ -11,6 +10,7 @@ from ..utils import (
float_or_none,
parse_age_limit,
qualities,
random_birthday,
try_get,
unified_timestamp,
urljoin,
@ -47,13 +47,10 @@ class VideoPressIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
query = random_birthday('birth_year', 'birth_month', 'birth_day')
video = self._download_json(
'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id,
video_id, query={
'birth_month': random.randint(1, 12),
'birth_day': random.randint(1, 31),
'birth_year': random.randint(1950, 1995),
})
video_id, query=query)
title = video['title']

View File

@ -112,21 +112,41 @@ class VRVIE(VRVBaseIE):
audio_locale = streams_json.get('audio_locale')
formats = []
for stream_id, stream in streams_json.get('streams', {}).get('adaptive_hls', {}).items():
stream_url = stream.get('url')
if not stream_url:
continue
stream_id = stream_id or audio_locale
m3u8_formats = self._extract_m3u8_formats(
stream_url, video_id, 'mp4', m3u8_id=stream_id,
note='Downloading %s m3u8 information' % stream_id,
fatal=False)
if audio_locale:
for f in m3u8_formats:
f['language'] = audio_locale
formats.extend(m3u8_formats)
for stream_type, streams in streams_json.get('streams', {}).items():
if stream_type in ('adaptive_hls', 'adaptive_dash'):
for stream in streams.values():
stream_url = stream.get('url')
if not stream_url:
continue
stream_id = stream.get('hardsub_locale') or audio_locale
format_id = '%s-%s' % (stream_type.split('_')[1], stream_id)
if stream_type == 'adaptive_hls':
adaptive_formats = self._extract_m3u8_formats(
stream_url, video_id, 'mp4', m3u8_id=format_id,
note='Downloading %s m3u8 information' % stream_id,
fatal=False)
else:
adaptive_formats = self._extract_mpd_formats(
stream_url, video_id, mpd_id=format_id,
note='Downloading %s MPD information' % stream_id,
fatal=False)
if audio_locale:
for f in adaptive_formats:
if f.get('acodec') != 'none':
f['language'] = audio_locale
formats.extend(adaptive_formats)
self._sort_formats(formats)
subtitles = {}
for subtitle in streams_json.get('subtitles', {}).values():
subtitle_url = subtitle.get('url')
if not subtitle_url:
continue
subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({
'url': subtitle_url,
'ext': subtitle.get('format', 'ass'),
})
thumbnails = []
for thumbnail in video_data.get('images', {}).get('thumbnails', []):
thumbnail_url = thumbnail.get('source')
@ -142,6 +162,7 @@ class VRVIE(VRVBaseIE):
'id': video_id,
'title': title,
'formats': formats,
'subtitles': subtitles,
'thumbnails': thumbnails,
'description': video_data.get('description'),
'duration': float_or_none(video_data.get('duration_ms'), 1000),

View File

@ -38,7 +38,6 @@ from ..utils import (
parse_duration,
remove_quotes,
remove_start,
sanitized_Request,
smuggle_url,
str_to_int,
try_get,
@ -54,7 +53,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
"""Provide base functions for Youtube extractors"""
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
_TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
_PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password'
_LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
_CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
_NETRC_MACHINE = 'youtube'
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
@ -96,72 +99,150 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
login_form = self._hidden_inputs(login_page)
login_form.update({
'checkConnection': 'youtube',
'Email': username,
'Passwd': password,
})
login_results = self._download_webpage(
self._PASSWORD_CHALLENGE_URL, None,
note='Logging in', errnote='unable to log in', fatal=False,
data=urlencode_postdata(login_form))
if login_results is False:
return False
error_msg = self._html_search_regex(
r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<',
login_results, 'error message', default=None)
if error_msg:
raise ExtractorError('Unable to login: %s' % error_msg, expected=True)
if re.search(r'id="errormsg_0_Passwd"', login_results) is not None:
raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True)
# Two-Factor
# TODO add SMS and phone call support - these require making a request and then prompting the user
if re.search(r'(?i)<form[^>]+id="challenge"', login_results) is not None:
tfa_code = self._get_tfa_info('2-step verification code')
if not tfa_code:
self._downloader.report_warning(
'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
return False
tfa_code = remove_start(tfa_code, 'G-')
tfa_form_strs = self._form_hidden_inputs('challenge', login_results)
tfa_form_strs.update({
'Pin': tfa_code,
'TrustDevice': 'on',
def req(url, f_req, note, errnote):
data = login_form.copy()
data.update({
'pstMsg': 1,
'checkConnection': 'youtube',
'checkedDomains': 'youtube',
'hl': 'en',
'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
'f.req': json.dumps(f_req),
'flowName': 'GlifWebSignIn',
'flowEntry': 'ServiceLogin',
})
return self._download_json(
url, None, note=note, errnote=errnote,
transform_source=lambda s: re.sub(r'^[^[]*', '', s),
fatal=False,
data=urlencode_postdata(data), headers={
'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
'Google-Accounts-XSRF': 1,
})
tfa_data = urlencode_postdata(tfa_form_strs)
def warn(message):
self._downloader.report_warning(message)
tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
tfa_results = self._download_webpage(
tfa_req, None,
note='Submitting TFA code', errnote='unable to submit tfa', fatal=False)
lookup_req = [
username,
None, [], None, 'US', None, None, 2, False, True,
[
None, None,
[2, 1, None, 1,
'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
None, [], 4],
1, [None, None, []], None, None, None, True
],
username,
]
if tfa_results is False:
return False
lookup_results = req(
self._LOOKUP_URL, lookup_req,
'Looking up account info', 'Unable to look up account info')
if re.search(r'(?i)<form[^>]+id="challenge"', tfa_results) is not None:
self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.')
return False
if re.search(r'(?i)<form[^>]+id="gaia_loginform"', tfa_results) is not None:
self._downloader.report_warning('unable to log in - did the page structure change?')
return False
if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None:
self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.')
return False
if re.search(r'(?i)<form[^>]+id="gaia_loginform"', login_results) is not None:
self._downloader.report_warning('unable to log in: bad username or password')
if lookup_results is False:
return False
user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
if not user_hash:
warn('Unable to extract user hash')
return False
challenge_req = [
user_hash,
None, 1, None, [1, None, None, None, [password, None, True]],
[
None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
1, [None, None, []], None, None, None, True
]]
challenge_results = req(
self._CHALLENGE_URL, challenge_req,
'Logging in', 'Unable to log in')
if challenge_results is False:
return
login_res = try_get(challenge_results, lambda x: x[0][5], list)
if login_res:
login_msg = try_get(login_res, lambda x: x[5], compat_str)
warn(
'Unable to login: %s' % 'Invalid password'
if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
return False
res = try_get(challenge_results, lambda x: x[0][-1], list)
if not res:
warn('Unable to extract result entry')
return False
tfa = try_get(res, lambda x: x[0][0], list)
if tfa:
tfa_str = try_get(tfa, lambda x: x[2], compat_str)
if tfa_str == 'TWO_STEP_VERIFICATION':
# SEND_SUCCESS - TFA code has been successfully sent to phone
# QUOTA_EXCEEDED - reached the limit of TFA codes
status = try_get(tfa, lambda x: x[5], compat_str)
if status == 'QUOTA_EXCEEDED':
warn('Exceeded the limit of TFA codes, try later')
return False
tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
if not tl:
warn('Unable to extract TL')
return False
tfa_code = self._get_tfa_info('2-step verification code')
if not tfa_code:
warn(
'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
'(Note that only TOTP (Google Authenticator App) codes work at this time.)')
return False
tfa_code = remove_start(tfa_code, 'G-')
tfa_req = [
user_hash, None, 2, None,
[
9, None, None, None, None, None, None, None,
[None, tfa_code, True, 2]
]]
tfa_results = req(
self._TFA_URL.format(tl), tfa_req,
'Submitting TFA code', 'Unable to submit TFA code')
if tfa_results is False:
return False
tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
if tfa_res:
tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
warn(
'Unable to finish TFA: %s' % 'Invalid TFA code'
if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
return False
check_cookie_url = try_get(
tfa_results, lambda x: x[0][-1][2], compat_str)
else:
check_cookie_url = try_get(res, lambda x: x[2], compat_str)
if not check_cookie_url:
warn('Unable to extract CheckCookie URL')
return False
check_cookie_results = self._download_webpage(
check_cookie_url, None, 'Checking cookie', fatal=False)
if check_cookie_results is False:
return False
if 'https://myaccount.google.com/' not in check_cookie_results:
warn('Unable to log in')
return False
return True
def _real_initialize(self):
@ -1257,6 +1338,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
@staticmethod
def _extract_chapters(description, duration):
if not description:
return None
chapter_lines = re.findall(
r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
description)
if not chapter_lines:
return None
chapters = []
for next_num, (chapter_line, time_point) in enumerate(
chapter_lines, start=1):
start_time = parse_duration(time_point)
if start_time is None:
continue
end_time = (duration if next_num == len(chapter_lines)
else parse_duration(chapter_lines[next_num][1]))
if end_time is None:
continue
chapter_title = re.sub(
r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
chapter_title = re.sub(r'\s+', ' ', chapter_title)
chapters.append({
'start_time': start_time,
'end_time': end_time,
'title': chapter_title,
})
return chapters
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
@ -1399,9 +1509,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_title = '_'
# description
video_description = get_element_by_id("eow-description", video_webpage)
description_original = video_description = get_element_by_id("eow-description", video_webpage)
if video_description:
video_description = re.sub(r'''(?x)
description_original = video_description = re.sub(r'''(?x)
<a\s+
(?:[a-zA-Z-]+="[^"]*"\s+)*?
(?:title|href)="([^"]+)"\s+
@ -1558,6 +1668,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if self._downloader.params.get('writeannotations', False):
video_annotations = self._extract_annotations(video_id)
chapters = self._extract_chapters(description_original, video_duration)
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
self.report_rtmp_download()
formats = [{
@ -1790,6 +1902,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'duration': video_duration,
'age_limit': 18 if age_gate else 0,
'annotations': video_annotations,
'chapters': chapters,
'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
'view_count': view_count,
'like_count': like_count,

View File

@ -11,6 +11,7 @@ import contextlib
import ctypes
import datetime
import email.utils
import email.header
import errno
import functools
import gzip
@ -2097,6 +2098,58 @@ def update_Request(req, url=None, data=None, headers={}, query={}):
return new_req
def _multipart_encode_impl(data, boundary):
content_type = 'multipart/form-data; boundary=%s' % boundary
out = b''
for k, v in data.items():
out += b'--' + boundary.encode('ascii') + b'\r\n'
if isinstance(k, compat_str):
k = k.encode('utf-8')
if isinstance(v, compat_str):
v = v.encode('utf-8')
# RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
# suggests sending UTF-8 directly. Firefox sends UTF-8, too
content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
if boundary.encode('ascii') in content:
raise ValueError('Boundary overlaps with data')
out += content
out += b'--' + boundary.encode('ascii') + b'--\r\n'
return out, content_type
def multipart_encode(data, boundary=None):
'''
Encode a dict to RFC 7578-compliant form-data
data:
A dict where keys and values can be either Unicode or bytes-like
objects.
boundary:
If specified a Unicode object, it's used as the boundary. Otherwise
a random boundary is generated.
Reference: https://tools.ietf.org/html/rfc7578
'''
has_specified_boundary = boundary is not None
while True:
if boundary is None:
boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
try:
out, content_type = _multipart_encode_impl(data, boundary)
break
except ValueError:
if has_specified_boundary:
raise
boundary = None
return out, content_type
def dict_get(d, key_or_keys, default=None, skip_false_values=True):
if isinstance(key_or_keys, (list, tuple)):
for key in key_or_keys:
@ -3760,3 +3813,11 @@ def write_xattr(path, key, value):
"Couldn't find a tool to set the xattrs. "
"Install either the python 'xattr' module, "
"or the 'xattr' binary.")
def random_birthday(year_field, month_field, day_field):
return {
year_field: str(random.randint(1950, 1995)),
month_field: str(random.randint(1, 12)),
day_field: str(random.randint(1, 31)),
}

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals
__version__ = '2017.05.01'
__version__ = '2017.05.09'