This commit is contained in:
Gilles Habran 2016-05-25 13:13:07 +02:00
commit d6318da4e7
48 changed files with 1359 additions and 353 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.16*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.05.21.2*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.16** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.05.21.2**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -35,7 +35,7 @@ $ youtube-dl -v <your command line>
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2016.05.16 [debug] youtube-dl version 2016.05.21.2
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -172,3 +172,4 @@ blahgeek
Kevin Deldycke Kevin Deldycke
inondle inondle
Tomáš Čech Tomáš Čech
Déstin Reed

View File

@ -25,7 +25,7 @@ If you do not have curl, you can alternatively use a recent wget:
sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
sudo chmod a+rx /usr/local/bin/youtube-dl sudo chmod a+rx /usr/local/bin/youtube-dl
Windows users can [download a .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in their home directory or any other location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29). Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](http://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/). OS X users can install **youtube-dl** with [Homebrew](http://brew.sh/).
@ -434,7 +434,7 @@ You can use `--ignore-config` if you want to disable the configuration file for
### Authentication with `.netrc` file ### Authentication with `.netrc` file
You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a`.netrc` file in your `$HOME` and restrict permissions to read/write by you only: You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](http://stackoverflow.com/tags/.netrc/info) on per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by you only:
``` ```
touch $HOME/.netrc touch $HOME/.netrc
chmod a-rwx,u+rw $HOME/.netrc chmod a-rwx,u+rw $HOME/.netrc
@ -694,6 +694,10 @@ hash -r
Again, from then on you'll be able to update with `sudo youtube-dl -U`. Again, from then on you'll be able to update with `sudo youtube-dl -U`.
### youtube-dl is extremely slow to start on Windows
Add a file exclusion for `youtube-dl.exe` in Windows Defender settings.
### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists ### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos. YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.

View File

@ -33,6 +33,8 @@ if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: th
useless_files=$(find youtube_dl -type f -not -name '*.py') useless_files=$(find youtube_dl -type f -not -name '*.py')
if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $useless_files"; exit 1; fi if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $useless_files"; exit 1; fi
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi
if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
/bin/echo -e "\n### First of all, testing..." /bin/echo -e "\n### First of all, testing..."
make clean make clean

View File

@ -16,6 +16,8 @@
- **9gag** - **9gag**
- **abc.net.au** - **abc.net.au**
- **Abc7News** - **Abc7News**
- **abcnews**
- **abcnews:video**
- **AcademicEarth:Course** - **AcademicEarth:Course**
- **acast** - **acast**
- **acast:channel** - **acast:channel**
@ -104,6 +106,7 @@
- **CBCPlayer** - **CBCPlayer**
- **CBS** - **CBS**
- **CBSInteractive** - **CBSInteractive**
- **CBSLocal**
- **CBSNews**: CBS News - **CBSNews**: CBS News
- **CBSNewsLiveVideo**: CBS News Live Videos - **CBSNewsLiveVideo**: CBS News Live Videos
- **CBSSports** - **CBSSports**
@ -213,6 +216,7 @@
- **Flickr** - **Flickr**
- **Folketinget**: Folketinget (ft.dk; Danish parliament) - **Folketinget**: Folketinget (ft.dk; Danish parliament)
- **FootyRoom** - **FootyRoom**
- **Formula1**
- **FOX** - **FOX**
- **Foxgay** - **Foxgay**
- **FoxNews**: Fox News and Fox Business Video - **FoxNews**: Fox News and Fox Business Video
@ -316,6 +320,7 @@
- **la7.tv** - **la7.tv**
- **Laola1Tv** - **Laola1Tv**
- **Le**: 乐视网 - **Le**: 乐视网
- **Learnr**
- **Lecture2Go** - **Lecture2Go**
- **Lemonde** - **Lemonde**
- **LePlaylist** - **LePlaylist**
@ -331,6 +336,7 @@
- **livestream** - **livestream**
- **livestream:original** - **livestream:original**
- **LnkGo** - **LnkGo**
- **LocalNews8**
- **LoveHomePorn** - **LoveHomePorn**
- **lrt.lt** - **lrt.lt**
- **lynda**: lynda.com videos - **lynda**: lynda.com videos
@ -556,6 +562,7 @@
- **ScreenJunkies** - **ScreenJunkies**
- **ScreenwaveMedia** - **ScreenwaveMedia**
- **SenateISVP** - **SenateISVP**
- **SendtoNews**
- **ServingSys** - **ServingSys**
- **Sexu** - **Sexu**
- **Shahid** - **Shahid**

View File

@ -103,6 +103,12 @@ class TestCompat(unittest.TestCase):
self.assertTrue(isinstance(doc.find('chinese').text, compat_str)) self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str)) self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
def test_compat_etree_fromstring_doctype(self):
xml = '''<?xml version="1.0"?>
<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd">
<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
compat_etree_fromstring(xml)
def test_struct_unpack(self): def test_struct_unpack(self):
self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,)) self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))

View File

@ -245,13 +245,20 @@ try:
except ImportError: # Python 2.6 except ImportError: # Python 2.6
from xml.parsers.expat import ExpatError as compat_xml_parse_error from xml.parsers.expat import ExpatError as compat_xml_parse_error
etree = xml.etree.ElementTree
class _TreeBuilder(etree.TreeBuilder):
def doctype(self, name, pubid, system):
pass
if sys.version_info[0] >= 3: if sys.version_info[0] >= 3:
compat_etree_fromstring = xml.etree.ElementTree.fromstring def compat_etree_fromstring(text):
return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
else: else:
# python 2.x tries to encode unicode strings with ascii (see the # python 2.x tries to encode unicode strings with ascii (see the
# XMLParser._fixtext method) # XMLParser._fixtext method)
etree = xml.etree.ElementTree
try: try:
_etree_iter = etree.Element.iter _etree_iter = etree.Element.iter
except AttributeError: # Python <=2.6 except AttributeError: # Python <=2.6
@ -265,7 +272,7 @@ else:
# 2.7 source # 2.7 source
def _XML(text, parser=None): def _XML(text, parser=None):
if not parser: if not parser:
parser = etree.XMLParser(target=etree.TreeBuilder()) parser = etree.XMLParser(target=_TreeBuilder())
parser.feed(text) parser.feed(text)
return parser.close() return parser.close()
@ -277,7 +284,7 @@ else:
return el return el
def compat_etree_fromstring(text): def compat_etree_fromstring(text):
doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory))) doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
for el in _etree_iter(doc): for el in _etree_iter(doc):
if el.text is not None and isinstance(el.text, bytes): if el.text is not None and isinstance(el.text, bytes):
el.text = el.text.decode('utf-8') el.text = el.text.decode('utf-8')

View File

@ -0,0 +1,224 @@
# coding: utf-8
from __future__ import unicode_literals
import base64
import hashlib
import json
import random
import time
from .common import InfoExtractor
from ..aes import aes_encrypt
from ..compat import compat_str
from ..utils import (
bytes_to_intlist,
determine_ext,
intlist_to_bytes,
int_or_none,
strip_jsonp,
)
def md5_text(s):
if not isinstance(s, compat_str):
s = compat_str(s)
return hashlib.md5(s.encode('utf-8')).hexdigest()
class AnvatoIE(InfoExtractor):
# Copied from anvplayer.min.js
_ANVACK_TABLE = {
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
'nbcu_nbcd_desktop_web_qa_1a6f01bdd0dc45a439043b694c8a031d': 'eSxJUbA2UUKBTXryyQ2d6NuM8oEqaPySvaPzfKNA',
'nbcu_nbcd_desktop_web_acc_eb2ff240a5d4ae9a63d4c297c32716b6c523a129': '89JR3RtUGbvKuuJIiKOMK0SoarLb5MUx8v89RcbP',
'nbcu_nbcd_watchvod_web_prod_e61107507180976724ec8e8319fe24ba5b4b60e1': 'Uc7dFt7MJ9GsBWB5T7iPvLaMSOt8BBxv4hAXk5vv',
'nbcu_nbcd_watchvod_web_qa_42afedba88a36203db5a4c09a5ba29d045302232': 'T12oDYVFP2IaFvxkmYMy5dKxswpLHtGZa4ZAXEi7',
'nbcu_nbcd_watchvod_web_acc_9193214448e2e636b0ffb78abacfd9c4f937c6ca': 'MmobcxUxMedUpohNWwXaOnMjlbiyTOBLL6d46ZpR',
'nbcu_local_monitor_web_acc_f998ad54eaf26acd8ee033eb36f39a7b791c6335': 'QvfIoPYrwsjUCcASiw3AIkVtQob2LtJHfidp9iWg',
'nbcu_cable_monitor_web_acc_a413759603e8bedfcd3c61b14767796e17834077': 'uwVPJLShvJWSs6sWEIuVem7MTF8A4IknMMzIlFto',
'nbcu_nbcd_mcpstage_web_qa_4c43a8f6e95a88dbb40276c0630ba9f693a63a4e': 'PxVYZVwjhgd5TeoPRxL3whssb5OUPnM3zyAzq8GY',
'nbcu_comcast_comcast_web_prod_074080762ad4ce956b26b43fb22abf153443a8c4': 'afnaRZfDyg1Z3WZHdupKfy6xrbAG2MHqe3VfuSwh',
'nbcu_comcast_comcast_web_qa_706103bb93ead3ef70b1de12a0e95e3c4481ade0': 'DcjsVbX9b3uoPlhdriIiovgFQZVxpISZwz0cx1ZK',
'nbcu_comcast_comcastcable_web_prod_669f04817536743563d7331c9293e59fbdbe3d07': '0RwMN2cWy10qhAhOscq3eK7aEe0wqnKt3vJ0WS4D',
'nbcu_comcast_comcastcable_web_qa_3d9d2d66219094127f0f6b09cc3c7bb076e3e1ca': '2r8G9DEya7PCqBceKZgrn2XkXgASjwLMuaFE1Aad',
'hearst_hearst_demo_web_stage_960726dfef3337059a01a78816e43b29ec04dfc7': 'cuZBPXTR6kSdoTCVXwk5KGA8rk3NrgGn4H6e9Dsp',
'anvato_mcpqa_demo_web_stage_18b55e00db5a13faa8d03ae6e41f6f5bcb15b922': 'IOaaLQ8ymqVyem14QuAvE5SndQynTcH5CrLkU2Ih',
'anvato_nextmedia_demo_web_stage_9787d56a02ff6b9f43e9a2b0920d8ca88beb5818': 'Pqu9zVzI1ApiIzbVA3VkGBEQHvdKSUuKpD6s2uaR',
'anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a': 'du1ccmn7RxzgizwbWU7hyUaGodNlJn7HtXI0WgXW',
'anvato_scripps_app_web_stage_360797e00fe2826be142155c4618cc52fce6c26c': '2PMrQ0BRoqCWl7nzphj0GouIMEh2mZYivAT0S1Su',
'fs2go_fs2go_go_all_prod_21934911ccfafc03a075894ead2260d11e2ddd24': 'RcuHlKikW2IJw6HvVoEkqq2UsuEJlbEl11pWXs4Q',
'fs2go_fs2go_go_web_prod_ead4b0eec7460c1a07783808db21b49cf1f2f9a7': '4K0HTT2u1zkQA2MaGaZmkLa1BthGSBdr7jllrhk5',
'fs2go_fs2go_go_web_stage_407585454a4400355d4391691c67f361': 'ftnc37VKRJBmHfoGGi3kT05bHyeJzilEzhKJCyl3',
'fs2go_fs2go_go_android_stage_44b714db6f8477f29afcba15a41e1d30': 'CtxpPvVpo6AbZGomYUhkKs7juHZwNml9b9J0J2gI',
'anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67': 'Pw0XX5KBDsyRnPS0R2JrSrXftsy8Jnz5pAjaYC8s',
'anvato_cbslocal_app_web_stage_547a5f096594cd3e00620c6f825cad1096d28c80': '37OBUhX2uwNyKhhrNzSSNHSRPZpApC3trdqDBpuz',
'fs2go_att_att_web_prod_1042dddd089a05438b6a08f972941176f699ffd8': 'JLcF20JwYvpv6uAGcLWIaV12jKwaL1R8us4b6Zkg',
'fs2go_att_att_web_stage_807c5001955fc114a3331fe027ddc76e': 'gbu1oO1y0JiOFh4SUipt86P288JHpyjSqolrrT1x',
'fs2go_fs2go_tudor_web_prod_a7dd8e5a7cdc830cae55eae6f3e9fee5ee49eb9b': 'ipcp87VCEZXPPe868j3orLqzc03oTy7DXsGkAXXH',
'anvato_mhz_app_web_prod_b808218b30de7fdf60340cbd9831512bc1bf6d37': 'Stlm5Gs6BEhJLRTZHcNquyzxGqr23EuFmE5DCgjX',
'fs2go_charter_charter_web_stage_c2c6e5a68375a1bf00fff213d3ff8f61a835a54c': 'Lz4hbJp1fwL6jlcz4M2PMzghM4jp4aAmybtT5dPc',
'fs2go_charter_charter_web_prod_ebfe3b10f1af215a7321cd3d629e0b81dfa6fa8c': 'vUJsK345A1bVmyYDRhZX0lqFIgVXuqhmuyp1EtPK',
'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b': 'GDKq1ixvX3MoBNdU5IOYmYa2DTUXYOozPjrCJnW7',
'anvato_epfox_app_web_stage_a3c2ce60f8f83ef374a88b68ee73a950f8ab87ce': '2jz2NH4BsXMaDsoJ5qkHMbcczAfIReo2eFYuVC1C',
'fs2go_verizon_verizon_web_stage_08e6df0354a4803f1b1f2428b5a9a382e8dbcd62': 'rKTVapNaAcmnUbGL4ZcuOoY4SE7VmZSQsblPFr7e',
'fs2go_verizon_verizon_web_prod_f909564cb606eff1f731b5e22e0928676732c445': 'qLSUuHerM3u9eNPzaHyUK52obai5MvE4XDJfqYe1',
'fs2go_foxcom_synd_web_stage_f7b9091f00ea25a4fdaaae77fca5b54cdc7e7043': '96VKF2vLd24fFiDfwPFpzM5llFN4TiIGAlodE0Re',
'fs2go_foxcom_synd_web_prod_0f2cdd64d87e4ab6a1d54aada0ff7a7c8387a064': 'agiPjbXEyEZUkbuhcnmVPhe9NNVbDjCFq2xkcx51',
'anvato_own_app_web_stage_1214ade5d28422c4dae9d03c1243aba0563c4dba': 'mzhamNac3swG4WsJAiUTacnGIODi6SWeVWk5D7ho',
'anvato_own_app_web_prod_944e162ed927ec3e9ed13eb68ed2f1008ee7565e': '9TSxh6G2TXOLBoYm9ro3LdNjjvnXpKb8UR8KoIP9',
'anvato_scripps_app_ftv_prod_a10a10468edd5afb16fb48171c03b956176afad1': 'COJ2i2UIPK7xZqIWswxe7FaVBOVgRkP1F6O6qGoH',
'anvato_scripps_app_ftv_stage_77d3ad2bdb021ec37ca2e35eb09acd396a974c9a': 'Q7nnopNLe2PPfGLOTYBqxSaRpl209IhqaEuDZi1F',
'anvato_univision_app_web_stage_551236ef07a0e17718c3995c35586b5ed8cb5031': 'D92PoLS6UitwxDRA191HUGT9OYcOjV6mPMa5wNyo',
'anvato_univision_app_web_prod_039a5c0a6009e637ae8ac906718a79911e0e65e1': '5mVS5u4SQjtw6NGw2uhMbKEIONIiLqRKck5RwQLR',
'nbcu_cnbc_springfield_ios_prod_670207fae43d6e9a94c351688851a2ce': 'M7fqCCIP9lW53oJbHs19OlJlpDrVyc2OL8gNeuTa',
'nbcu_cnbc_springfieldvod_ios_prod_7a5f04b1ceceb0e9c9e2264a44aa236e08e034c2': 'Yia6QbJahW0S7K1I0drksimhZb4UFq92xLBmmMvk',
'anvato_cox_app_web_prod_ce45cda237969f93e7130f50ee8bb6280c1484ab': 'cc0miZexpFtdoqZGvdhfXsLy7FXjRAOgb9V0f5fZ',
'anvato_cox_app_web_stage_c23dbe016a8e9d8c7101d10172b92434f6088bf9': 'yivU3MYHd2eDZcOfmLbINVtqxyecKTOp8OjOuoGJ',
'anvato_chnzero_app_web_stage_b1164d1352b579e792e542fddf13ee34c0eeb46b': 'A76QkXMmVH8lTCfU15xva1mZnSVcqeY4Xb22Kp7m',
'anvato_chnzero_app_web_prod_253d358928dc08ec161eda2389d53707288a730c': 'OA5QI3ZWZZkdtUEDqh28AH8GedsF6FqzJI32596b',
'anvato_discovery_vodpoc_web_stage_9fa7077b5e8af1f8355f65d4fb8d2e0e9d54e2b7': 'q3oT191tTQ5g3JCP67PkjLASI9s16DuWZ6fYmry3',
'anvato_discovery_vodpoc_web_prod_688614983167a1af6cdf6d76343fda10a65223c1': 'qRvRQCTVHd0VVOHsMvvfidyWmlYVrTbjby7WqIuK',
'nbcu_cnbc_springfieldvod_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
'nbcu_cnbc_springfield_ftv_stage_826040aad1925a46ac5dfb4b3c5143e648c6a30d': 'JQaSb5a8Tz0PT4ti329DNmzDO30TnngTHmvX8Vua',
'nbcu_nbcd_capture_web_stage_4dd9d585bfb984ebf856dee35db027b2465cc4ae': '0j1Ov4Vopyi2HpBZJYdL2m8ERJVGYh3nNpzPiO8F',
'nbcu_nbcd_watch3_android_prod_7712ca5fcf1c22f19ec1870a9650f9c37db22dcf': '3LN2UB3rPUAMu7ZriWkHky9vpLMXYha8JbSnxBlx',
'nbcu_nbcd_watchvod3_android_prod_0910a3a4692d57c0b5ff4316075bc5d096be45b9': 'mJagcQ2II30vUOAauOXne7ERwbf5S9nlB3IP17lQ',
'anvato_scripps_app_atv_prod_790deda22e16e71e83df58f880cd389908a45d52': 'CB6trI1mpoDIM5o54DNTsji90NDBQPZ4z4RqBNSH',
'nbcu_nbcd_watchv4_android_prod_ff67cef9cb409158c6f8c3533edddadd0b750507': 'j8CHQCUWjlYERj4NFRmUYOND85QNbHViH09UwuKm',
'nbcu_nbcd_watchvodv4_android_prod_a814d781609989dea6a629d50ae4c7ad8cc8e907': 'rkVnUXxdA9rawVLUlDQtMue9Y4Q7lFEaIotcUhjt',
'rvVKpA50qlOPLFxMjrCGf5pdkdQDm7qn': '1J7ZkY5Qz5lMLi93QOH9IveE7EYB3rLl',
'nbcu_dtv_local_web_prod_b266cf49defe255fd4426a97e27c09e513e9f82f': 'HuLnJDqzLa4saCzYMJ79zDRSQpEduw1TzjMNQu2b',
'nbcu_att_local_web_prod_4cef038b2d969a6b7d700a56a599040b6a619f67': 'Q0Em5VDc2KpydUrVwzWRXAwoNBulWUxCq2faK0AV',
'nbcu_dish_local_web_prod_c56dcaf2da2e9157a4266c82a78195f1dd570f6b': 'bC1LWmRz9ayj2AlzizeJ1HuhTfIaJGsDBnZNgoRg',
'nbcu_verizon_local_web_prod_88bebd2ce006d4ed980de8133496f9a74cb9b3e1': 'wzhDKJZpgvUSS1EQvpCQP8Q59qVzcPixqDGJefSk',
'nbcu_charter_local_web_prod_9ad90f7fc4023643bb718f0fe0fd5beea2382a50': 'PyNbxNhEWLzy1ZvWEQelRuIQY88Eub7xbSVRMdfT',
'nbcu_suddenlink_local_web_prod_20fb711725cac224baa1c1cb0b1c324d25e97178': '0Rph41lPXZbb3fqeXtHjjbxfSrNbtZp1Ygq7Jypa',
'nbcu_wow_local_web_prod_652d9ce4f552d9c2e7b5b1ed37b8cb48155174ad': 'qayIBZ70w1dItm2zS42AptXnxW15mkjRrwnBjMPv',
'nbcu_centurylink_local_web_prod_2034402b029bf3e837ad46814d9e4b1d1345ccd5': 'StePcPMkjsX51PcizLdLRMzxMEl5k2FlsMLUNV4k',
'nbcu_atlanticbrd_local_web_prod_8d5f5ecbf7f7b2f5e6d908dd75d90ae3565f682e': 'NtYLb4TFUS0pRs3XTkyO5sbVGYjVf17bVbjaGscI',
'nbcu_nbcd_watchvod_web_dev_08bc05699be47c4f31d5080263a8cfadc16d0f7c': 'hwxi2dgDoSWgfmVVXOYZm14uuvku4QfopstXckhr',
'anvato_nextmedia_app_web_prod_a4fa8c7204aa65e71044b57aaf63711980cfe5a0': 'tQN1oGPYY1nM85rJYePWGcIb92TG0gSqoVpQTWOw',
'anvato_mcp_lin_web_prod_4c36fbfd4d8d8ecae6488656e21ac6d1ac972749': 'GUXNf5ZDX2jFUpu4WT2Go4DJ5nhUCzpnwDRRUx1K',
'anvato_mcp_univision_web_prod_37fe34850c99a3b5cdb71dab10a417dd5cdecafa': 'bLDYF8JqfG42b7bwKEgQiU9E2LTIAtnKzSgYpFUH',
'anvato_mcp_fs2go_web_prod_c7b90a93e171469cdca00a931211a2f556370d0a': 'icgGoYGipQMMSEvhplZX1pwbN69srwKYWksz3xWK',
'anvato_mcp_sps_web_prod_54bdc90dd6ba21710e9f7074338365bba28da336': 'fA2iQdI7RDpynqzQYIpXALVS83NTPr8LLFK4LFsu',
'anvato_mcp_anv_web_prod_791407490f4c1ef2a4bcb21103e0cb1bcb3352b3': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
'anvato_mcp_gray_web_prod_4c10f067c393ed8fc453d3930f8ab2b159973900': 'rMOUZqe9lwcGq2mNgG3EDusm6lKgsUnczoOX3mbg',
'anvato_mcp_hearst_web_prod_5356c3de0fc7c90a3727b4863ca7fec3a4524a99': 'P3uXJ0fXXditBPCGkfvlnVScpPEfKmc64Zv7ZgbK',
'anvato_mcp_cbs_web_prod_02f26581ff80e5bda7aad28226a8d369037f2cbe': 'mGPvo5ZA5SgjOFAPEPXv7AnOpFUICX8hvFQVz69n',
'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582': 'qyT6PXXLjVNCrHaRVj0ugAhalNRS7Ee9BP7LUokD',
'nbcu_nbcd_watchvodv4_web_stage_4108362fba2d4ede21f262fea3c4162cbafd66c7': 'DhaU5lj0W2gEdcSSsnxURq8t7KIWtJfD966crVDk',
'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn',
'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W',
'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ',
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'
}
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
def __init__(self, *args, **kwargs):
super(AnvatoIE, self).__init__(*args, **kwargs)
self.__server_time = None
def _server_time(self, access_key, video_id):
if self.__server_time is not None:
return self.__server_time
self.__server_time = int(self._download_json(
self._api_prefix(access_key) + 'server_time?anvack=' + access_key, video_id,
note='Fetching server time')['server_time'])
return self.__server_time
def _api_prefix(self, access_key):
return 'https://tkx2-%s.anvato.net/rest/v2/' % ('prod' if 'prod' in access_key else 'stage')
def _get_video_json(self, access_key, video_id):
# See et() in anvplayer.min.js, which is an alias of getVideoJSON()
video_data_url = self._api_prefix(access_key) + 'mcp/video/%s?anvack=%s' % (video_id, access_key)
server_time = self._server_time(access_key, video_id)
input_data = '%d~%s~%s' % (server_time, md5_text(video_data_url), md5_text(server_time))
auth_secret = intlist_to_bytes(aes_encrypt(
bytes_to_intlist(input_data[:64]), bytes_to_intlist(self._AUTH_KEY)))
video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii')
anvrid = md5_text(time.time() * 1000 * random.random())[:30]
payload = {
'api': {
'anvrid': anvrid,
'anvstk': md5_text('%s|%s|%d|%s' % (
access_key, anvrid, server_time, self._ANVACK_TABLE[access_key])),
'anvts': server_time,
},
}
return self._download_json(
video_data_url, video_id, transform_source=strip_jsonp,
data=json.dumps(payload).encode('utf-8'))
def _extract_anvato_videos(self, webpage, video_id):
anvplayer_data = self._parse_json(self._html_search_regex(
r'<script[^>]+data-anvp=\'([^\']+)\'', webpage,
'Anvato player data'), video_id)
video_id = anvplayer_data['video']
access_key = anvplayer_data['accessKey']
video_data = self._get_video_json(access_key, video_id)
formats = []
for published_url in video_data['published_urls']:
video_url = published_url['embed_url']
ext = determine_ext(video_url)
if ext == 'smil':
formats.extend(self._extract_smil_formats(video_url, video_id))
continue
tbr = int_or_none(published_url.get('kbps'))
a_format = {
'url': video_url,
'format_id': ('-'.join(filter(None, ['http', published_url.get('cdn_name')]))).lower(),
'tbr': tbr if tbr != 0 else None,
}
if ext == 'm3u8':
# Not using _extract_m3u8_formats here as individual media
# playlists are also included in published_urls.
if tbr is None:
formats.append(self._m3u8_meta_format(video_url, ext='mp4', m3u8_id='hls'))
continue
else:
a_format.update({
'format_id': '-'.join(filter(None, ['hls', compat_str(tbr)])),
'ext': 'mp4',
})
elif ext == 'mp3':
a_format['vcodec'] = 'none'
else:
a_format.update({
'width': int_or_none(published_url.get('width')),
'height': int_or_none(published_url.get('height')),
})
formats.append(a_format)
self._sort_formats(formats)
subtitles = {}
for caption in video_data.get('captions', []):
a_caption = {
'url': caption['url'],
'ext': 'tt' if caption.get('format') == 'SMPTE-TT' else None
}
subtitles.setdefault(caption['language'], []).append(a_caption)
return {
'id': video_id,
'formats': formats,
'title': video_data.get('def_title'),
'description': video_data.get('def_description'),
'categories': video_data.get('categories'),
'thumbnail': video_data.get('thumbnail'),
'subtitles': subtitles,
}

View File

@ -29,7 +29,7 @@ class BandcampIE(InfoExtractor):
'_skip': 'There is a limit of 200 free downloads / month for the test song' '_skip': 'There is a limit of 200 free downloads / month for the test song'
}, { }, {
'url': 'http://benprunty.bandcamp.com/track/lanius-battle', 'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
'md5': '2b68e5851514c20efdff2afc5603b8b4', 'md5': '73d0b3171568232574e45652f8720b5c',
'info_dict': { 'info_dict': {
'id': '2650410135', 'id': '2650410135',
'ext': 'mp3', 'ext': 'mp3',
@ -48,6 +48,10 @@ class BandcampIE(InfoExtractor):
if m_trackinfo: if m_trackinfo:
json_code = m_trackinfo.group(1) json_code = m_trackinfo.group(1)
data = json.loads(json_code)[0] data = json.loads(json_code)[0]
track_id = compat_str(data['id'])
if not data.get('file'):
raise ExtractorError('Not streamable', video_id=track_id, expected=True)
formats = [] formats = []
for format_id, format_url in data['file'].items(): for format_id, format_url in data['file'].items():
@ -64,7 +68,7 @@ class BandcampIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': compat_str(data['id']), 'id': track_id,
'title': data['title'], 'title': data['title'],
'formats': formats, 'formats': formats,
'duration': float_or_none(data.get('duration')), 'duration': float_or_none(data.get('duration')),

View File

@ -444,6 +444,10 @@ class BrightcoveNewIE(InfoExtractor):
# non numeric ref: prefixed video id # non numeric ref: prefixed video id
'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356', 'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356',
'only_matching': True, 'only_matching': True,
}, {
# unavailable video without message but with error_code
'url': 'http://players.brightcove.net/1305187701/c832abfb-641b-44eb-9da0-2fe76786505f_default/index.html?videoId=4377407326001',
'only_matching': True,
}] }]
@staticmethod @staticmethod
@ -514,8 +518,9 @@ class BrightcoveNewIE(InfoExtractor):
}) })
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
json_data = self._parse_json(e.cause.read().decode(), video_id) json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
raise ExtractorError(json_data[0]['message'], expected=True) raise ExtractorError(
json_data.get('message') or json_data['error_code'], expected=True)
raise raise
title = json_data['name'].strip() title = json_data['name'].strip()

View File

@ -11,6 +11,7 @@ class BYUtvIE(InfoExtractor):
_VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)' _VALID_URL = r'^https?://(?:www\.)?byutv.org/watch/[0-9a-f-]+/(?P<video_id>[^/?#]+)'
_TEST = { _TEST = {
'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5',
'md5': '05850eb8c749e2ee05ad5a1c34668493',
'info_dict': { 'info_dict': {
'id': 'studio-c-season-5-episode-5', 'id': 'studio-c-season-5-episode-5',
'ext': 'mp4', 'ext': 'mp4',
@ -21,7 +22,8 @@ class BYUtvIE(InfoExtractor):
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
} },
'add_ie': ['Ooyala'],
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -11,7 +11,7 @@ from ..utils import (
class CBCIE(InfoExtractor): class CBCIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:[^/]+/)+(?P<id>[^/?#]+)' _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
# with mediaId # with mediaId
'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs', 'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
@ -28,6 +28,7 @@ class CBCIE(InfoExtractor):
}, { }, {
# with clipId # with clipId
'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live', 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
'md5': '0274a90b51a9b4971fe005c63f592f12',
'info_dict': { 'info_dict': {
'id': '2487345465', 'id': '2487345465',
'ext': 'mp4', 'ext': 'mp4',
@ -91,8 +92,9 @@ class CBCIE(InfoExtractor):
class CBCPlayerIE(InfoExtractor): class CBCPlayerIE(InfoExtractor):
_VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)' _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
_TEST = { _TESTS = [{
'url': 'http://www.cbc.ca/player/play/2683190193', 'url': 'http://www.cbc.ca/player/play/2683190193',
'md5': '64d25f841ddf4ddb28a235338af32e2c',
'info_dict': { 'info_dict': {
'id': '2683190193', 'id': '2683190193',
'ext': 'mp4', 'ext': 'mp4',
@ -102,7 +104,33 @@ class CBCPlayerIE(InfoExtractor):
'upload_date': '20160210', 'upload_date': '20160210',
'uploader': 'CBCC-NEW', 'uploader': 'CBCC-NEW',
}, },
} }, {
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
'url': 'http://www.cbc.ca/player/play/2657631896',
'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
'info_dict': {
'id': '2657631896',
'ext': 'mp3',
'title': 'CBC Montreal is organizing its first ever community hackathon!',
'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.',
'timestamp': 1425704400,
'upload_date': '20150307',
'uploader': 'CBCC-NEW',
},
}, {
# available only when we add `formats=MPEG4,FLV,MP3` to theplatform url
'url': 'http://www.cbc.ca/player/play/2164402062',
'md5': '17a61eb813539abea40618d6323a7f82',
'info_dict': {
'id': '2164402062',
'ext': 'flv',
'title': 'Cancer survivor four times over',
'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
'timestamp': 1320410746,
'upload_date': '20111104',
'uploader': 'CBCC-NEW',
},
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
@ -110,7 +138,7 @@ class CBCPlayerIE(InfoExtractor):
'_type': 'url_transparent', '_type': 'url_transparent',
'ie_key': 'ThePlatform', 'ie_key': 'ThePlatform',
'url': smuggle_url( 'url': smuggle_url(
'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true' % video_id, { 'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true&formats=MPEG4,FLV,MP3' % video_id, {
'force_smil_url': True 'force_smil_url': True
}), }),
'id': video_id, 'id': video_id,

View File

@ -1,5 +1,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .theplatform import ThePlatformIE from .theplatform import ThePlatformIE
from ..utils import ( from ..utils import (
xpath_text, xpath_text,
@ -21,7 +23,7 @@ class CBSBaseIE(ThePlatformIE):
class CBSIE(CBSBaseIE): class CBSIE(CBSBaseIE):
_VALID_URL = r'https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<id>[^/]+)' _VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))'
_TESTS = [{ _TESTS = [{
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', 'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
@ -66,11 +68,12 @@ class CBSIE(CBSBaseIE):
TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true'
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) content_id, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id) if not content_id:
content_id = self._search_regex( webpage = self._download_webpage(url, display_id)
[r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"], content_id = self._search_regex(
webpage, 'content id') [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"],
webpage, 'content id')
items_data = self._download_xml( items_data = self._download_xml(
'http://can.cbs.com/thunder/player/videoPlayerService.php', 'http://can.cbs.com/thunder/player/videoPlayerService.php',
content_id, query={'partner': 'cbs', 'contentId': content_id}) content_id, query={'partner': 'cbs', 'contentId': content_id})

View File

@ -0,0 +1,84 @@
# coding: utf-8
from __future__ import unicode_literals
import calendar
import datetime
from .anvato import AnvatoIE
from .sendtonews import SendtoNewsIE
from ..compat import compat_urlparse
class CBSLocalIE(AnvatoIE):
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
_TESTS = [{
# Anvato backend
'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis',
'md5': 'f0ee3081e3843f575fccef901199b212',
'info_dict': {
'id': '3401037',
'ext': 'mp4',
'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.',
'thumbnail': 're:^https?://.*',
'timestamp': 1463440500,
'upload_date': '20160516',
'subtitles': {
'en': 'mincount:5',
},
'categories': [
'Stations\\Spoken Word\\KCBSTV',
'Syndication\\MSN',
'Syndication\\NDN',
'Syndication\\AOL',
'Syndication\\Yahoo',
'Syndication\\Tribune',
'Syndication\\Curb.tv',
'Content\\News'
],
},
}, {
# SendtoNews embed
'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
'info_dict': {
'id': 'GxfCe0Zo7D-175909-5588',
'ext': 'mp4',
'title': 'Recap: CLE 15, CIN 6',
'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
'upload_date': '20160516',
'timestamp': 1463433840,
'duration': 49,
},
'params': {
# m3u8 download
'skip_download': True,
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
sendtonews_url = SendtoNewsIE._extract_url(webpage)
if sendtonews_url:
info_dict = {
'_type': 'url_transparent',
'url': compat_urlparse.urljoin(url, sendtonews_url),
}
else:
info_dict = self._extract_anvato_videos(webpage, display_id)
time_str = self._html_search_regex(
r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
timestamp = None
if time_str:
timestamp = calendar.timegm(datetime.datetime.strptime(
time_str, '%b %d, %Y %I:%M %p').timetuple())
info_dict.update({
'display_id': display_id,
'timestamp': timestamp,
})
return info_dict

View File

@ -44,10 +44,10 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
# or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524 # or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
_VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow) _VALID_URL = r'''(?x)^(:(?P<shortname>tds|thedailyshow)
|https?://(:www\.)? |https?://(:www\.)?
(?P<showname>thedailyshow|thecolbertreport)\.(?:cc\.)?com/ (?P<showname>thedailyshow|thecolbertreport|tosh)\.(?:cc\.)?com/
((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)| ((?:full-)?episodes/(?:[0-9a-z]{6}/)?(?P<episode>.*)|
(?P<clip> (?P<clip>
(?:(?:guests/[^/]+|videos|video-playlists|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+)) (?:(?:guests/[^/]+|videos|video-(?:clips|playlists)|special-editions|news-team/[^/]+)/[^/]+/(?P<videotitle>[^/?#]+))
|(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) |(the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
|(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)) |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))
)| )|
@ -129,6 +129,9 @@ class ComedyCentralShowsIE(MTVServicesInfoExtractor):
}, { }, {
'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel', 'url': 'http://thedailyshow.cc.com/news-team/michael-che/7wnfel/we-need-to-talk-about-israel',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
'only_matching': True,
}] }]
_available_formats = ['3500', '2200', '1700', '1200', '750', '400'] _available_formats = ['3500', '2200', '1700', '1200', '750', '400']

View File

@ -1058,12 +1058,8 @@ class InfoExtractor(object):
}) })
return formats return formats
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, m3u8_id=None):
entry_protocol='m3u8', preference=None, return {
m3u8_id=None, note=None, errnote=None,
fatal=True, live=False):
formats = [{
'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])), 'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
'url': m3u8_url, 'url': m3u8_url,
'ext': ext, 'ext': ext,
@ -1071,7 +1067,14 @@ class InfoExtractor(object):
'preference': preference - 1 if preference else -1, 'preference': preference - 1 if preference else -1,
'resolution': 'multiple', 'resolution': 'multiple',
'format_note': 'Quality selection URL', 'format_note': 'Quality selection URL',
}] }
def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
entry_protocol='m3u8', preference=None,
m3u8_id=None, note=None, errnote=None,
fatal=True, live=False):
formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
format_url = lambda u: ( format_url = lambda u: (
u u
@ -1138,7 +1141,7 @@ class InfoExtractor(object):
format_id = [] format_id = []
if m3u8_id: if m3u8_id:
format_id.append(m3u8_id) format_id.append(m3u8_id)
last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') != 'SUBTITLES' else None last_media_name = last_media.get('NAME') if last_media and last_media.get('TYPE') not in ('SUBTITLES', 'CLOSED-CAPTIONS') else None
# Despite specification does not mention NAME attribute for # Despite specification does not mention NAME attribute for
# EXT-X-STREAM-INF it still sometimes may be present # EXT-X-STREAM-INF it still sometimes may be present
stream_name = last_info.get('NAME') or last_media_name stream_name = last_info.get('NAME') or last_media_name
@ -1278,21 +1281,21 @@ class InfoExtractor(object):
m3u8_count = 0 m3u8_count = 0
srcs = [] srcs = []
videos = smil.findall(self._xpath_ns('.//video', namespace)) media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
for video in videos: for medium in media:
src = video.get('src') src = medium.get('src')
if not src or src in srcs: if not src or src in srcs:
continue continue
srcs.append(src) srcs.append(src)
bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000)
filesize = int_or_none(video.get('size') or video.get('fileSize')) filesize = int_or_none(medium.get('size') or medium.get('fileSize'))
width = int_or_none(video.get('width')) width = int_or_none(medium.get('width'))
height = int_or_none(video.get('height')) height = int_or_none(medium.get('height'))
proto = video.get('proto') proto = medium.get('proto')
ext = video.get('ext') ext = medium.get('ext')
src_ext = determine_ext(src) src_ext = determine_ext(src)
streamer = video.get('streamer') or base streamer = medium.get('streamer') or base
if proto == 'rtmp' or streamer.startswith('rtmp'): if proto == 'rtmp' or streamer.startswith('rtmp'):
rtmp_count += 1 rtmp_count += 1

View File

@ -11,8 +11,8 @@ from ..utils import (
class EpornerIE(InfoExtractor): class EpornerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<display_id>[\w-]+)' _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\w+)/(?P<display_id>[\w-]+)'
_TEST = { _TESTS = [{
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/', 'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
'md5': '39d486f046212d8e1b911c52ab4691f8', 'md5': '39d486f046212d8e1b911c52ab4691f8',
'info_dict': { 'info_dict': {
@ -23,8 +23,22 @@ class EpornerIE(InfoExtractor):
'duration': 1838, 'duration': 1838,
'view_count': int, 'view_count': int,
'age_limit': 18, 'age_limit': 18,
} },
} },
# New (May 2016) URL layout
{
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
'md5': '3469eeaa93b6967a34cdbdbb9d064b33',
'info_dict': {
'id': '3YRUtzMcWn0',
'display_id': 'Star-Wars-XXX-Parody',
'ext': 'mp4',
'title': 'Star Wars XXX Parody',
'duration': 361.0,
'view_count': int,
'age_limit': 18,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)

View File

@ -8,6 +8,7 @@ class ESPNIE(InfoExtractor):
_VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)' _VALID_URL = r'https?://espn\.go\.com/(?:[^/]+/)*(?P<id>[^/]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://espn.go.com/video/clip?id=10365079', 'url': 'http://espn.go.com/video/clip?id=10365079',
'md5': '60e5d097a523e767d06479335d1bdc58',
'info_dict': { 'info_dict': {
'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
'ext': 'mp4', 'ext': 'mp4',
@ -15,21 +16,22 @@ class ESPNIE(InfoExtractor):
'description': None, 'description': None,
}, },
'params': { 'params': {
# m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['OoyalaExternal'],
}, { }, {
# intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
'url': 'http://espn.go.com/video/clip?id=2743663', 'url': 'http://espn.go.com/video/clip?id=2743663',
'md5': 'f4ac89b59afc7e2d7dbb049523df6768',
'info_dict': { 'info_dict': {
'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg', 'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Must-See Moments: Best of the MLS season', 'title': 'Must-See Moments: Best of the MLS season',
}, },
'params': { 'params': {
# m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['OoyalaExternal'],
}, { }, {
'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079', 'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
'only_matching': True, 'only_matching': True,

View File

@ -111,6 +111,7 @@ from .cbc import (
CBCPlayerIE, CBCPlayerIE,
) )
from .cbs import CBSIE from .cbs import CBSIE
from .cbslocal import CBSLocalIE
from .cbsinteractive import CBSInteractiveIE from .cbsinteractive import CBSInteractiveIE
from .cbsnews import ( from .cbsnews import (
CBSNewsIE, CBSNewsIE,
@ -230,6 +231,7 @@ from .everyonesmixtape import EveryonesMixtapeIE
from .exfm import ExfmIE from .exfm import ExfmIE
from .expotv import ExpoTVIE from .expotv import ExpoTVIE
from .extremetube import ExtremeTubeIE from .extremetube import ExtremeTubeIE
from .eyedotv import EyedoTVIE
from .facebook import FacebookIE from .facebook import FacebookIE
from .faz import FazIE from .faz import FazIE
from .fc2 import FC2IE from .fc2 import FC2IE
@ -396,6 +398,7 @@ from .livestream import (
LivestreamShortenerIE, LivestreamShortenerIE,
) )
from .lnkgo import LnkGoIE from .lnkgo import LnkGoIE
from .localnews8 import LocalNews8IE
from .lovehomeporn import LoveHomePornIE from .lovehomeporn import LoveHomePornIE
from .lrt import LRTIE from .lrt import LRTIE
from .lynda import ( from .lynda import (
@ -615,6 +618,10 @@ from .qqmusic import (
QQMusicPlaylistIE, QQMusicPlaylistIE,
) )
from .r7 import R7IE from .r7 import R7IE
from .radiocanada import (
RadioCanadaIE,
RadioCanadaAudioVideoIE,
)
from .radiode import RadioDeIE from .radiode import RadioDeIE
from .radiojavan import RadioJavanIE from .radiojavan import RadioJavanIE
from .radiobremen import RadioBremenIE from .radiobremen import RadioBremenIE
@ -628,6 +635,7 @@ from .rds import RDSIE
from .redtube import RedTubeIE from .redtube import RedTubeIE
from .regiotv import RegioTVIE from .regiotv import RegioTVIE
from .restudy import RestudyIE from .restudy import RestudyIE
from .reuters import ReutersIE
from .reverbnation import ReverbNationIE from .reverbnation import ReverbNationIE
from .revision3 import Revision3IE from .revision3 import Revision3IE
from .rice import RICEIE from .rice import RICEIE
@ -669,6 +677,7 @@ from .screencastomatic import ScreencastOMaticIE
from .screenjunkies import ScreenJunkiesIE from .screenjunkies import ScreenJunkiesIE
from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE from .screenwavemedia import ScreenwaveMediaIE, TeamFourIE
from .senateisvp import SenateISVPIE from .senateisvp import SenateISVPIE
from .sendtonews import SendtoNewsIE
from .servingsys import ServingSysIE from .servingsys import ServingSysIE
from .sexu import SexuIE from .sexu import SexuIE
from .shahid import ShahidIE from .shahid import ShahidIE
@ -938,7 +947,10 @@ from .vube import VubeIE
from .vuclip import VuClipIE from .vuclip import VuClipIE
from .vulture import VultureIE from .vulture import VultureIE
from .walla import WallaIE from .walla import WallaIE
from .washingtonpost import WashingtonPostIE from .washingtonpost import (
WashingtonPostIE,
WashingtonPostArticleIE,
)
from .wat import WatIE from .wat import WatIE
from .watchindianporn import WatchIndianPornIE from .watchindianporn import WatchIndianPornIE
from .wdr import ( from .wdr import (

View File

@ -0,0 +1,64 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
xpath_text,
parse_duration,
ExtractorError,
)
class EyedoTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?eyedo\.tv/[^/]+/(?:#!/)?Live/Detail/(?P<id>[0-9]+)'
_TEST = {
'url': 'https://www.eyedo.tv/en-US/#!/Live/Detail/16301',
'md5': 'ba14f17995cdfc20c36ba40e21bf73f7',
'info_dict': {
'id': '16301',
'ext': 'mp4',
'title': 'Journée du conseil scientifique de l\'Afnic 2015',
'description': 'md5:4abe07293b2f73efc6e1c37028d58c98',
'uploader': 'Afnic Live',
'uploader_id': '8023',
}
}
_ROOT_URL = 'http://live.eyedo.net:1935/'
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_xml('http://eyedo.tv/api/live/GetLive/%s' % video_id, video_id)
def _add_ns(path):
return self._xpath_ns(path, 'http://schemas.datacontract.org/2004/07/EyeDo.Core.Implementation.Web.ViewModels.Api')
title = xpath_text(video_data, _add_ns('Titre'), 'title', True)
state_live_code = xpath_text(video_data, _add_ns('StateLiveCode'), 'title', True)
if state_live_code == 'avenir':
raise ExtractorError(
'%s said: We\'re sorry, but this video is not yet available.' % self.IE_NAME,
expected=True)
is_live = state_live_code == 'live'
m3u8_url = None
# http://eyedo.tv/Content/Html5/Scripts/html5view.js
if is_live:
if xpath_text(video_data, 'Cdn') == 'true':
m3u8_url = 'http://rrr.sz.xlcdn.com/?account=eyedo&file=A%s&type=live&service=wowza&protocol=http&output=playlist.m3u8' % video_id
else:
m3u8_url = self._ROOT_URL + 'w/%s/eyedo_720p/playlist.m3u8' % video_id
else:
m3u8_url = self._ROOT_URL + 'replay-w/%s/mp4:%s.mp4/playlist.m3u8' % (video_id, video_id)
return {
'id': video_id,
'title': title,
'formats': self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8' if is_live else 'm3u8_native'),
'description': xpath_text(video_data, _add_ns('Description')),
'duration': parse_duration(xpath_text(video_data, _add_ns('Duration'))),
'uploader': xpath_text(video_data, _add_ns('Createur')),
'uploader_id': xpath_text(video_data, _add_ns('CreateurId')),
'chapter': xpath_text(video_data, _add_ns('ChapitreTitre')),
'chapter_id': xpath_text(video_data, _add_ns('ChapitreId')),
}

View File

@ -13,7 +13,8 @@ class Formula1IE(InfoExtractor):
'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV', 'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
'ext': 'flv', 'ext': 'flv',
'title': 'Race highlights - Spain 2016', 'title': 'Race highlights - Spain 2016',
} },
'add_ie': ['Ooyala'],
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -62,6 +62,7 @@ from .digiteka import DigitekaIE
from .instagram import InstagramIE from .instagram import InstagramIE
from .liveleak import LiveLeakIE from .liveleak import LiveLeakIE
from .threeqsdn import ThreeQSDNIE from .threeqsdn import ThreeQSDNIE
from .theplatform import ThePlatformIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -717,15 +718,18 @@ class GenericIE(InfoExtractor):
}, },
# Wistia embed # Wistia embed
{ {
'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', 'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
'md5': '8788b683c777a5cf25621eaf286d0c23', 'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
'info_dict': { 'info_dict': {
'id': '1cfaf6b7ea', 'id': '6e2wtrbdaf',
'ext': 'mov', 'ext': 'mov',
'title': 'md5:51364a8d3d009997ba99656004b5e20d', 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
'duration': 643.0, 'description': 'a Paywall Videos video from Remilon',
'filesize': 182808282, 'duration': 644.072,
'uploader': 'education-portal.com', 'uploader': 'study.com',
'timestamp': 1459678540,
'upload_date': '20160403',
'filesize': 24687186,
}, },
}, },
{ {
@ -734,14 +738,30 @@ class GenericIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'uxjb0lwrcz', 'id': 'uxjb0lwrcz',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks', 'title': 'Conversation about Hexagonal Rails Part 1',
'description': 'a Martin Fowler video from ThoughtWorks', 'description': 'a Martin Fowler video from ThoughtWorks',
'duration': 1715.0, 'duration': 1715.0,
'uploader': 'thoughtworks.wistia.com', 'uploader': 'thoughtworks.wistia.com',
'upload_date': '20140603',
'timestamp': 1401832161, 'timestamp': 1401832161,
'upload_date': '20140603',
}, },
}, },
# Wistia standard embed (async)
{
'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
'info_dict': {
'id': '807fafadvk',
'ext': 'mp4',
'title': 'Drip Brennan Dunn Workshop',
'description': 'a JV Webinars video from getdrip-1',
'duration': 4986.95,
'timestamp': 1463607249,
'upload_date': '20160518',
},
'params': {
'skip_download': True,
}
},
# Soundcloud embed # Soundcloud embed
{ {
'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/', 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
@ -764,6 +784,19 @@ class GenericIE(InfoExtractor):
'title': 'Rosetta #CometLanding webcast HL 10', 'title': 'Rosetta #CometLanding webcast HL 10',
} }
}, },
# Another Livestream embed, without 'new.' in URL
{
'url': 'https://www.freespeech.org/',
'info_dict': {
'id': '123537347',
'ext': 'mp4',
'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
},
'params': {
# Live stream
'skip_download': True,
},
},
# LazyYT # LazyYT
{ {
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986', 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
@ -1174,6 +1207,16 @@ class GenericIE(InfoExtractor):
'uploader': 'Lake8737', 'uploader': 'Lake8737',
} }
}, },
# Duplicated embedded video URLs
{
'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
'info_dict': {
'id': '149298443_480_16c25b74_2',
'ext': 'mp4',
'title': 'vs. Blue Orange Spring Game',
'uploader': 'www.hudl.com',
},
},
] ]
def report_following_redirect(self, new_url): def report_following_redirect(self, new_url):
@ -1480,6 +1523,11 @@ class GenericIE(InfoExtractor):
if bc_urls: if bc_urls:
return _playlist_from_matches(bc_urls, ie='BrightcoveNew') return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
# Look for ThePlatform embeds
tp_urls = ThePlatformIE._extract_urls(webpage)
if tp_urls:
return _playlist_from_matches(tp_urls, ie='ThePlatform')
# Look for embedded rtl.nl player # Look for embedded rtl.nl player
matches = re.findall( matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
@ -1548,21 +1596,26 @@ class GenericIE(InfoExtractor):
'url': embed_url, 'url': embed_url,
'ie_key': 'Wistia', 'ie_key': 'Wistia',
'uploader': video_uploader, 'uploader': video_uploader,
'title': video_title,
'id': video_id,
} }
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage) match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
if match: if match:
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')), 'url': 'wistia:%s' % match.group('id'),
'ie_key': 'Wistia', 'ie_key': 'Wistia',
'uploader': video_uploader, 'uploader': video_uploader,
'title': video_title,
'id': match.group('id')
} }
match = re.search(
r'''(?sx)
<script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
<div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
''', webpage)
if match:
return self.url_result(self._proto_relative_url(
'wistia:%s' % match.group('id')), 'Wistia')
# Look for SVT player # Look for SVT player
svt_url = SVTIE._extract_url(webpage) svt_url = SVTIE._extract_url(webpage)
if svt_url: if svt_url:
@ -1838,7 +1891,7 @@ class GenericIE(InfoExtractor):
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast') return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
mobj = re.search( mobj = re.search(
r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"', r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
webpage) webpage)
if mobj is not None: if mobj is not None:
return self.url_result(mobj.group('url'), 'Livestream') return self.url_result(mobj.group('url'), 'Livestream')
@ -2081,7 +2134,7 @@ class GenericIE(InfoExtractor):
raise UnsupportedError(url) raise UnsupportedError(url)
entries = [] entries = []
for video_url in found: for video_url in orderedSet(found):
video_url = unescapeHTML(video_url) video_url = unescapeHTML(video_url)
video_url = video_url.replace('\\/', '/') video_url = video_url.replace('\\/', '/')
video_url = compat_urlparse.urljoin(url, video_url) video_url = compat_urlparse.urljoin(url, video_url)

View File

@ -14,6 +14,7 @@ class GrouponIE(InfoExtractor):
'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors', 'description': 'Studio kept at 105 degrees and 40% humidity with anti-microbial and anti-slip Flotex flooring; certified instructors',
}, },
'playlist': [{ 'playlist': [{
'md5': '42428ce8a00585f9bc36e49226eae7a1',
'info_dict': { 'info_dict': {
'id': 'fk6OhWpXgIQ', 'id': 'fk6OhWpXgIQ',
'ext': 'mp4', 'ext': 'mp4',
@ -24,10 +25,11 @@ class GrouponIE(InfoExtractor):
'uploader_id': 'groupon', 'uploader_id': 'groupon',
'uploader': 'Groupon', 'uploader': 'Groupon',
}, },
'add_ie': ['Youtube'],
}], }],
'params': { 'params': {
'skip_download': True, 'skip_download': True,
} },
} }
_PROVIDERS = { _PROVIDERS = {

View File

@ -8,7 +8,7 @@ class HowcastIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)' _VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
_TEST = { _TEST = {
'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly', 'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
'md5': '8b743df908c42f60cf6496586c7f12c3', 'md5': '7d45932269a288149483144f01b99789',
'info_dict': { 'info_dict': {
'id': '390161', 'id': '390161',
'ext': 'mp4', 'ext': 'mp4',
@ -19,9 +19,9 @@ class HowcastIE(InfoExtractor):
'duration': 56.823, 'duration': 56.823,
}, },
'params': { 'params': {
# m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['Ooyala'],
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -5,33 +5,50 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext,
float_or_none, float_or_none,
int_or_none, int_or_none,
) )
class JWPlatformBaseIE(InfoExtractor): class JWPlatformBaseIE(InfoExtractor):
def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True): def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None):
video_data = jwplayer_data['playlist'][0] video_data = jwplayer_data['playlist'][0]
formats = [] formats = []
for source in video_data['sources']: for source in video_data['sources']:
source_url = self._proto_relative_url(source['file']) source_url = self._proto_relative_url(source['file'])
source_type = source.get('type') or '' source_type = source.get('type') or ''
if source_type in ('application/vnd.apple.mpegurl', 'hls'): if source_type in ('application/vnd.apple.mpegurl', 'hls') or determine_ext(source_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
source_url, video_id, 'mp4', 'm3u8_native', fatal=False)) source_url, video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
elif source_type.startswith('audio'): elif source_type.startswith('audio'):
formats.append({ formats.append({
'url': source_url, 'url': source_url,
'vcodec': 'none', 'vcodec': 'none',
}) })
else: else:
formats.append({ a_format = {
'url': source_url, 'url': source_url,
'width': int_or_none(source.get('width')), 'width': int_or_none(source.get('width')),
'height': int_or_none(source.get('height')), 'height': int_or_none(source.get('height')),
}) }
if source_url.startswith('rtmp'):
a_format['ext'] = 'flv',
# See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
# of jwplayer.flash.swf
rtmp_url_parts = re.split(
r'((?:mp4|mp3|flv):)', source_url, 1)
if len(rtmp_url_parts) == 3:
rtmp_url, prefix, play_path = rtmp_url_parts
a_format.update({
'url': rtmp_url,
'play_path': prefix + play_path,
})
if rtmp_params:
a_format.update(rtmp_params)
formats.append(a_format)
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {} subtitles = {}

View File

@ -7,48 +7,53 @@ from .common import InfoExtractor
from ..compat import compat_urlparse from ..compat import compat_urlparse
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
int_or_none,
remove_end,
unified_strdate,
ExtractorError, ExtractorError,
int_or_none,
parse_iso8601,
remove_end,
) )
class LifeNewsIE(InfoExtractor): class LifeNewsIE(InfoExtractor):
IE_NAME = 'lifenews' IE_NAME = 'life'
IE_DESC = 'LIFE | NEWS' IE_DESC = 'Life.ru'
_VALID_URL = r'https?://lifenews\.ru/(?:mobile/)?(?P<section>news|video)/(?P<id>\d+)' _VALID_URL = r'https?://life\.ru/t/[^/]+/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
# single video embedded via video/source # single video embedded via video/source
'url': 'http://lifenews.ru/news/98736', 'url': 'https://life.ru/t/новости/98736',
'md5': '77c95eaefaca216e32a76a343ad89d23', 'md5': '77c95eaefaca216e32a76a343ad89d23',
'info_dict': { 'info_dict': {
'id': '98736', 'id': '98736',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Мужчина нашел дома архив оборонного завода', 'title': 'Мужчина нашел дома архив оборонного завода',
'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26', 'description': 'md5:3b06b1b39b5e2bea548e403d99b8bf26',
'timestamp': 1344154740,
'upload_date': '20120805', 'upload_date': '20120805',
'view_count': int,
} }
}, { }, {
# single video embedded via iframe # single video embedded via iframe
'url': 'http://lifenews.ru/news/152125', 'url': 'https://life.ru/t/новости/152125',
'md5': '77d19a6f0886cd76bdbf44b4d971a273', 'md5': '77d19a6f0886cd76bdbf44b4d971a273',
'info_dict': { 'info_dict': {
'id': '152125', 'id': '152125',
'ext': 'mp4', 'ext': 'mp4',
'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ', 'title': 'В Сети появилось видео захвата «Правым сектором» колхозных полей ',
'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ', 'description': 'Жители двух поселков Днепропетровской области не простили радикалам угрозу лишения плодородных земель и пошли в лобовую. ',
'timestamp': 1427961840,
'upload_date': '20150402', 'upload_date': '20150402',
'view_count': int,
} }
}, { }, {
# two videos embedded via iframe # two videos embedded via iframe
'url': 'http://lifenews.ru/news/153461', 'url': 'https://life.ru/t/новости/153461',
'info_dict': { 'info_dict': {
'id': '153461', 'id': '153461',
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве', 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве',
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.', 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
'upload_date': '20150505', 'timestamp': 1430825520,
'view_count': int,
}, },
'playlist': [{ 'playlist': [{
'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795', 'md5': '9b6ef8bc0ffa25aebc8bdb40d89ab795',
@ -57,6 +62,7 @@ class LifeNewsIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)', 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 1)',
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.', 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
'timestamp': 1430825520,
'upload_date': '20150505', 'upload_date': '20150505',
}, },
}, { }, {
@ -66,22 +72,25 @@ class LifeNewsIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)', 'title': 'В Москве спасли потерявшегося медвежонка, который спрятался на дереве (Видео 2)',
'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.', 'description': 'Маленький хищник не смог найти дорогу домой и обрел временное убежище на тополе недалеко от жилого массива, пока его не нашла соседская собака.',
'timestamp': 1430825520,
'upload_date': '20150505', 'upload_date': '20150505',
}, },
}], }],
}, { }, {
'url': 'http://lifenews.ru/video/13035', 'url': 'https://life.ru/t/новости/213035',
'only_matching': True,
}, {
'url': 'https://life.ru/t/%D0%BD%D0%BE%D0%B2%D0%BE%D1%81%D1%82%D0%B8/153461',
'only_matching': True,
}, {
'url': 'https://life.ru/t/новости/411489/manuel_vals_nazval_frantsiiu_tsieliu_nomier_odin_dlia_ighil',
'only_matching': True, 'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
section = mobj.group('section')
webpage = self._download_webpage( webpage = self._download_webpage(url, video_id)
'http://lifenews.ru/%s/%s' % (section, video_id),
video_id, 'Downloading page')
video_urls = re.findall( video_urls = re.findall(
r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage) r'<video[^>]+><source[^>]+src=["\'](.+?)["\']', webpage)
@ -95,26 +104,22 @@ class LifeNewsIE(InfoExtractor):
title = remove_end( title = remove_end(
self._og_search_title(webpage), self._og_search_title(webpage),
' - Первый по срочным новостям — LIFE | NEWS') ' - Life.ru')
description = self._og_search_description(webpage) description = self._og_search_description(webpage)
view_count = self._html_search_regex( view_count = self._html_search_regex(
r'<div class=\'views\'>\s*(\d+)\s*</div>', webpage, 'view count', fatal=False) r'<div[^>]+class=(["\']).*?\bhits-count\b.*?\1[^>]*>\s*(?P<value>\d+)\s*</div>',
comment_count = self._html_search_regex( webpage, 'view count', fatal=False, group='value')
r'=\'commentCount\'[^>]*>\s*(\d+)\s*<',
webpage, 'comment count', fatal=False)
upload_date = self._html_search_regex( timestamp = parse_iso8601(self._search_regex(
r'<time[^>]*datetime=\'([^\']+)\'', webpage, 'upload date', fatal=False) r'<time[^>]+datetime=(["\'])(?P<value>.+?)\1',
if upload_date is not None: webpage, 'upload date', fatal=False, group='value'))
upload_date = unified_strdate(upload_date)
common_info = { common_info = {
'description': description, 'description': description,
'view_count': int_or_none(view_count), 'view_count': int_or_none(view_count),
'comment_count': int_or_none(comment_count), 'timestamp': timestamp,
'upload_date': upload_date,
} }
def make_entry(video_id, video_url, index=None): def make_entry(video_id, video_url, index=None):
@ -183,7 +188,8 @@ class LifeEmbedIE(InfoExtractor):
ext = determine_ext(video_url) ext = determine_ext(video_url)
if ext == 'm3u8': if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', m3u8_id='m3u8')) video_url, video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='m3u8'))
else: else:
formats.append({ formats.append({
'url': video_url, 'url': video_url,

View File

@ -150,7 +150,7 @@ class LivestreamIE(InfoExtractor):
} }
def _extract_stream_info(self, stream_info): def _extract_stream_info(self, stream_info):
broadcast_id = stream_info['broadcast_id'] broadcast_id = compat_str(stream_info['broadcast_id'])
is_live = stream_info.get('is_live') is_live = stream_info.get('is_live')
formats = [] formats = []

View File

@ -0,0 +1,47 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class LocalNews8IE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?localnews8\.com/(?:[^/]+/)*(?P<display_id>[^/]+)/(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.localnews8.com/news/rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings/35183304',
'md5': 'be4d48aea61aa2bde7be2ee47691ad20',
'info_dict': {
'id': '35183304',
'display_id': 'rexburg-business-turns-carbon-fiber-scraps-into-wedding-rings',
'ext': 'mp4',
'title': 'Rexburg business turns carbon fiber scraps into wedding ring',
'description': 'The process was first invented by Lamborghini and less than a dozen companies around the world use it.',
'duration': 153,
'timestamp': 1441844822,
'upload_date': '20150910',
'uploader_id': 'api',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
partner_id = self._search_regex(
r'partnerId\s*[:=]\s*(["\'])(?P<id>\d+)\1',
webpage, 'partner id', group='id')
kaltura_id = self._search_regex(
r'videoIdString\s*[:=]\s*(["\'])kaltura:(?P<id>[0-9a-z_]+)\1',
webpage, 'videl id', group='id')
return {
'_type': 'url_transparent',
'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
'ie_key': 'Kaltura',
'id': video_id,
'display_id': display_id,
}

View File

@ -55,7 +55,9 @@ class NRKBaseIE(InfoExtractor):
for subtitle in ('webVtt', 'timedText'): for subtitle in ('webVtt', 'timedText'):
subtitle_url = asset.get('%sSubtitlesUrl' % subtitle) subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
if subtitle_url: if subtitle_url:
subtitles.setdefault('no', []).append({'url': subtitle_url}) subtitles.setdefault('no', []).append({
'url': compat_urllib_parse_unquote(subtitle_url)
})
entries.append({ entries.append({
'id': asset.get('carrierId') or entry_id, 'id': asset.get('carrierId') or entry_id,
'title': entry_title, 'title': entry_title,

View File

@ -8,6 +8,7 @@ from ..utils import (
float_or_none, float_or_none,
ExtractorError, ExtractorError,
unsmuggle_url, unsmuggle_url,
determine_ext,
) )
from ..compat import compat_urllib_parse_urlencode from ..compat import compat_urllib_parse_urlencode
@ -15,71 +16,80 @@ from ..compat import compat_urllib_parse_urlencode
class OoyalaBaseIE(InfoExtractor): class OoyalaBaseIE(InfoExtractor):
_PLAYER_BASE = 'http://player.ooyala.com/' _PLAYER_BASE = 'http://player.ooyala.com/'
_CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/' _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
_AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v1/authorization/embed_code/%s/%s?' _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
def _extract(self, content_tree_url, video_id, domain='example.org'): def _extract(self, content_tree_url, video_id, domain='example.org'):
content_tree = self._download_json(content_tree_url, video_id)['content_tree'] content_tree = self._download_json(content_tree_url, video_id)['content_tree']
metadata = content_tree[list(content_tree)[0]] metadata = content_tree[list(content_tree)[0]]
embed_code = metadata['embed_code'] embed_code = metadata['embed_code']
pcode = metadata.get('asset_pcode') or embed_code pcode = metadata.get('asset_pcode') or embed_code
video_info = { title = metadata['title']
'id': embed_code,
'title': metadata['title'], auth_data = self._download_json(
'description': metadata.get('description'), self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) +
'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'), compat_urllib_parse_urlencode({
'duration': float_or_none(metadata.get('duration'), 1000), 'domain': domain,
} 'supportedFormats': 'mp4,rtmp,m3u8,hds',
}), video_id)
cur_auth_data = auth_data['authorization_data'][embed_code]
urls = [] urls = []
formats = [] formats = []
for supported_format in ('mp4', 'm3u8', 'hds', 'rtmp'): if cur_auth_data['authorized']:
auth_data = self._download_json( for stream in cur_auth_data['streams']:
self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + s_url = base64.b64decode(
compat_urllib_parse_urlencode({ stream['url']['data'].encode('ascii')).decode('utf-8')
'domain': domain, if s_url in urls:
'supportedFormats': supported_format continue
}), urls.append(s_url)
video_id, 'Downloading %s JSON' % supported_format) ext = determine_ext(s_url, None)
delivery_type = stream['delivery_type']
cur_auth_data = auth_data['authorization_data'][embed_code] if delivery_type == 'hls' or ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
if cur_auth_data['authorized']: s_url, embed_code, 'mp4', 'm3u8_native',
for stream in cur_auth_data['streams']: m3u8_id='hls', fatal=False))
url = base64.b64decode( elif delivery_type == 'hds' or ext == 'f4m':
stream['url']['data'].encode('ascii')).decode('utf-8') formats.extend(self._extract_f4m_formats(
if url in urls: s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
continue elif ext == 'smil':
urls.append(url) formats.extend(self._extract_smil_formats(
delivery_type = stream['delivery_type'] s_url, embed_code, fatal=False))
if delivery_type == 'hls' or '.m3u8' in url: else:
formats.extend(self._extract_m3u8_formats( formats.append({
url, embed_code, 'mp4', 'm3u8_native', 'url': s_url,
m3u8_id='hls', fatal=False)) 'ext': ext or stream.get('delivery_type'),
elif delivery_type == 'hds' or '.f4m' in url: 'vcodec': stream.get('video_codec'),
formats.extend(self._extract_f4m_formats( 'format_id': delivery_type,
url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False)) 'width': int_or_none(stream.get('width')),
elif '.smil' in url: 'height': int_or_none(stream.get('height')),
formats.extend(self._extract_smil_formats( 'abr': int_or_none(stream.get('audio_bitrate')),
url, embed_code, fatal=False)) 'vbr': int_or_none(stream.get('video_bitrate')),
else: 'fps': float_or_none(stream.get('framerate')),
formats.append({ })
'url': url, else:
'ext': stream.get('delivery_type'), raise ExtractorError('%s said: %s' % (
'vcodec': stream.get('video_codec'), self.IE_NAME, cur_auth_data['message']), expected=True)
'format_id': delivery_type,
'width': int_or_none(stream.get('width')),
'height': int_or_none(stream.get('height')),
'abr': int_or_none(stream.get('audio_bitrate')),
'vbr': int_or_none(stream.get('video_bitrate')),
'fps': float_or_none(stream.get('framerate')),
})
else:
raise ExtractorError('%s said: %s' % (
self.IE_NAME, cur_auth_data['message']), expected=True)
self._sort_formats(formats) self._sort_formats(formats)
video_info['formats'] = formats subtitles = {}
return video_info for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items():
sub_url = sub.get('url')
if not sub_url:
continue
subtitles[lang] = [{
'url': sub_url,
}]
return {
'id': embed_code,
'title': title,
'description': metadata.get('description'),
'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
'duration': float_or_none(metadata.get('duration'), 1000),
'subtitles': subtitles,
'formats': formats,
}
class OoyalaIE(OoyalaBaseIE): class OoyalaIE(OoyalaBaseIE):

View File

@ -2,7 +2,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import parse_iso8601 from ..utils import (
parse_iso8601,
unescapeHTML,
)
class PeriscopeIE(InfoExtractor): class PeriscopeIE(InfoExtractor):
@ -42,8 +45,11 @@ class PeriscopeIE(InfoExtractor):
broadcast = broadcast_data['broadcast'] broadcast = broadcast_data['broadcast']
status = broadcast['status'] status = broadcast['status']
uploader = broadcast.get('user_display_name') or broadcast_data.get('user', {}).get('display_name') user = broadcast_data.get('user', {})
uploader_id = broadcast.get('user_id') or broadcast_data.get('user', {}).get('id')
uploader = broadcast.get('user_display_name') or user.get('display_name')
uploader_id = (broadcast.get('username') or user.get('username') or
broadcast.get('user_id') or user.get('id'))
title = '%s - %s' % (uploader, status) if uploader else status title = '%s - %s' % (uploader, status) if uploader else status
state = broadcast.get('state').lower() state = broadcast.get('state').lower()
@ -92,6 +98,7 @@ class PeriscopeUserIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': 'LularoeHusbandMike', 'id': 'LularoeHusbandMike',
'title': 'LULAROE HUSBAND MIKE', 'title': 'LULAROE HUSBAND MIKE',
'description': 'md5:6cf4ec8047768098da58e446e82c82f0',
}, },
# Periscope only shows videos in the last 24 hours, so it's possible to # Periscope only shows videos in the last 24 hours, so it's possible to
# get 0 videos # get 0 videos
@ -103,16 +110,19 @@ class PeriscopeUserIE(InfoExtractor):
webpage = self._download_webpage(url, user_id) webpage = self._download_webpage(url, user_id)
broadcast_data = self._parse_json(self._html_search_meta( data_store = self._parse_json(
'broadcast-data', webpage, default='{}'), user_id) unescapeHTML(self._search_regex(
username = broadcast_data.get('user', {}).get('display_name') r'data-store=(["\'])(?P<data>.+?)\1',
user_broadcasts = self._parse_json( webpage, 'data store', default='{}', group='data')),
self._html_search_meta('user-broadcasts', webpage, default='{}'),
user_id) user_id)
user = data_store.get('User', {}).get('user', {})
title = user.get('display_name') or user.get('username')
description = user.get('description')
entries = [ entries = [
self.url_result( self.url_result(
'https://www.periscope.tv/%s/%s' % (user_id, broadcast['id'])) 'https://www.periscope.tv/%s/%s' % (user_id, broadcast['id']))
for broadcast in user_broadcasts.get('broadcasts', [])] for broadcast in data_store.get('UserBroadcastHistory', {}).get('broadcasts', [])]
return self.playlist_result(entries, user_id, username) return self.playlist_result(entries, user_id, title, description)

View File

@ -0,0 +1,130 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
xpath_text,
find_xpath_attr,
determine_ext,
int_or_none,
unified_strdate,
xpath_element,
ExtractorError,
)
class RadioCanadaIE(InfoExtractor):
IE_NAME = 'radiocanada'
_VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
_TEST = {
'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
'info_dict': {
'id': '7184272',
'ext': 'flv',
'title': 'Le parcours du tireur capté sur vidéo',
'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
'upload_date': '20141023',
},
'params': {
# rtmp download
'skip_download': True,
},
}
def _real_extract(self, url):
app_code, video_id = re.match(self._VALID_URL, url).groups()
formats = []
# TODO: extract m3u8 and f4m formats
# m3u8 formats can be extracted using ipad device_type return 403 error code when ffmpeg try to download segements
# f4m formats can be extracted using flashhd device_type but they produce unplayable file
for device_type in ('flash',):
v_data = self._download_xml(
'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx',
video_id, note='Downloading %s XML' % device_type, query={
'appCode': app_code,
'idMedia': video_id,
'connectionType': 'broadband',
'multibitrate': 'true',
'deviceType': device_type,
# paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction
'paysJ391wsHjbOJwvCs26toz': 'CA',
'bypasslock': 'NZt5K62gRqfc',
})
v_url = xpath_text(v_data, 'url')
if not v_url:
continue
if v_url == 'null':
raise ExtractorError('%s said: %s' % (
self.IE_NAME, xpath_text(v_data, 'message')), expected=True)
ext = determine_ext(v_url)
if ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
v_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(v_url, video_id, f4m_id='hds', fatal=False))
else:
ext = determine_ext(v_url)
bitrates = xpath_element(v_data, 'bitrates')
for url_e in bitrates.findall('url'):
tbr = int_or_none(url_e.get('bitrate'))
if not tbr:
continue
formats.append({
'format_id': 'rtmp-%d' % tbr,
'url': re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url),
'ext': 'flv',
'protocol': 'rtmp',
'width': int_or_none(url_e.get('width')),
'height': int_or_none(url_e.get('height')),
'tbr': tbr,
})
self._sort_formats(formats)
metadata = self._download_xml(
'http://api.radio-canada.ca/metaMedia/v1/index.ashx',
video_id, note='Downloading metadata XML', query={
'appCode': app_code,
'idMedia': video_id,
})
def get_meta(name):
el = find_xpath_attr(metadata, './/Meta', 'name', name)
return el.text if el is not None else None
return {
'id': video_id,
'title': get_meta('Title'),
'description': get_meta('Description') or get_meta('ShortDescription'),
'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
'duration': int_or_none(get_meta('length')),
'series': get_meta('Emission'),
'season_number': int_or_none('SrcSaison'),
'episode_number': int_or_none('SrcEpisode'),
'upload_date': unified_strdate(get_meta('Date')),
'formats': formats,
}
class RadioCanadaAudioVideoIE(InfoExtractor):
'radiocanada:audiovideo'
_VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)'
_TEST = {
'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
'info_dict': {
'id': '7527184',
'ext': 'flv',
'title': 'Barack Obama au Vietnam',
'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam',
'upload_date': '20160523',
},
'params': {
# rtmp download
'skip_download': True,
},
}
def _real_extract(self, url):
return self.url_result('radiocanada:medianet:%s' % self._match_id(url))

View File

@ -0,0 +1,69 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
js_to_json,
int_or_none,
unescapeHTML,
)
class ReutersIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)'
_TEST = {
'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562',
'md5': '8015113643a0b12838f160b0b81cc2ee',
'info_dict': {
'id': '368575562',
'ext': 'mp4',
'title': 'San Francisco police chief resigns',
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(
'http://www.reuters.com/assets/iframe/yovideo?videoId=%s' % video_id, video_id)
video_data = js_to_json(self._search_regex(
r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);',
webpage, 'video data'))
def get_json_value(key, fatal=False):
return self._search_regex('"%s"\s*:\s*"([^"]+)"' % key, video_data, key, fatal=fatal)
title = unescapeHTML(get_json_value('title', fatal=True))
mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups()
mas_data = self._download_json(
'http://mas-e.cds1.yospace.com/mas/%s/%s?trans=json' % (mmid, fid),
video_id, transform_source=js_to_json)
formats = []
for f in mas_data:
f_url = f.get('url')
if not f_url:
continue
method = f.get('method')
if method == 'hls':
formats.extend(self._extract_m3u8_formats(
f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
else:
container = f.get('container')
ext = '3gp' if method == 'mobile' else container
formats.append({
'format_id': ext,
'url': f_url,
'ext': ext,
'container': container if method != 'mobile' else None,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'thumbnail': get_json_value('thumb'),
'duration': int_or_none(get_json_value('seconds')),
'formats': formats,
}

View File

@ -64,7 +64,7 @@ def _decrypt_url(png):
class RTVEALaCartaIE(InfoExtractor): class RTVEALaCartaIE(InfoExtractor):
IE_NAME = 'rtve.es:alacarta' IE_NAME = 'rtve.es:alacarta'
IE_DESC = 'RTVE a la carta' IE_DESC = 'RTVE a la carta'
_VALID_URL = r'https?://www\.rtve\.es/(m/)?alacarta/videos/[^/]+/[^/]+/(?P<id>\d+)' _VALID_URL = r'https?://www\.rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/',
@ -87,6 +87,9 @@ class RTVEALaCartaIE(InfoExtractor):
}, { }, {
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve', 'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/',
'only_matching': True,
}] }]
def _real_initialize(self): def _real_initialize(self):

View File

@ -0,0 +1,86 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .jwplatform import JWPlatformBaseIE
from ..compat import compat_parse_qs
from ..utils import (
ExtractorError,
parse_duration,
)
class SendtoNewsIE(JWPlatformBaseIE):
_VALID_URL = r'https?://embed\.sendtonews\.com/player/embed\.php\?(?P<query>[^#]+)'
_TEST = {
# From http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/
'url': 'http://embed.sendtonews.com/player/embed.php?SK=GxfCe0Zo7D&MK=175909&PK=5588&autoplay=on&sound=yes',
'info_dict': {
'id': 'GxfCe0Zo7D-175909-5588',
'ext': 'mp4',
'title': 'Recap: CLE 15, CIN 6',
'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
'duration': 49,
},
'params': {
# m3u8 download
'skip_download': True,
},
}
_URL_TEMPLATE = '//embed.sendtonews.com/player/embed.php?SK=%s&MK=%s&PK=%s'
@classmethod
def _extract_url(cls, webpage):
mobj = re.search(r'''(?x)<script[^>]+src=([\'"])
(?:https?:)?//embed\.sendtonews\.com/player/responsiveembed\.php\?
.*\bSC=(?P<SC>[0-9a-zA-Z-]+).*
\1>''', webpage)
if mobj:
sk, mk, pk = mobj.group('SC').split('-')
return cls._URL_TEMPLATE % (sk, mk, pk)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
params = compat_parse_qs(mobj.group('query'))
if 'SK' not in params or 'MK' not in params or 'PK' not in params:
raise ExtractorError('Invalid URL', expected=True)
video_id = '-'.join([params['SK'][0], params['MK'][0], params['PK'][0]])
webpage = self._download_webpage(url, video_id)
jwplayer_data_str = self._search_regex(
r'jwplayer\("[^"]+"\)\.setup\((.+?)\);', webpage, 'JWPlayer data')
js_vars = {
'w': 1024,
'h': 768,
'modeVar': 'html5',
}
for name, val in js_vars.items():
js_val = '%d' % val if isinstance(val, int) else '"%s"' % val
jwplayer_data_str = jwplayer_data_str.replace(':%s,' % name, ':%s,' % js_val)
info_dict = self._parse_jwplayer_data(
self._parse_json(jwplayer_data_str, video_id),
video_id, require_title=False, rtmp_params={'no_resume': True})
title = self._html_search_regex(
r'<div[^>]+class="embedTitle">([^<]+)</div>', webpage, 'title')
description = self._html_search_regex(
r'<div[^>]+class="embedSubTitle">([^<]+)</div>', webpage,
'description', fatal=False)
duration = parse_duration(self._html_search_regex(
r'<div[^>]+class="embedDetails">([0-9:]+)', webpage,
'duration', fatal=False))
info_dict.update({
'title': title,
'description': description,
'duration': duration,
})
return info_dict

View File

@ -11,6 +11,7 @@ class TeachingChannelIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution', 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
'md5': '3d6361864d7cac20b57c8784da17166f',
'info_dict': { 'info_dict': {
'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM', 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
'ext': 'mp4', 'ext': 'mp4',
@ -19,9 +20,9 @@ class TeachingChannelIE(InfoExtractor):
'duration': 422.255, 'duration': 422.255,
}, },
'params': { 'params': {
# m3u8 download
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['Ooyala'],
} }
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -6,7 +6,7 @@ from .common import InfoExtractor
class TF1IE(InfoExtractor): class TF1IE(InfoExtractor):
"""TF1 uses the wat.tv player.""" """TF1 uses the wat.tv player."""
_VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|www\.tfou)\.fr/(?:[^/]+/)*(?P<id>.+?)\.html' _VALID_URL = r'https?://(?:(?:videos|www|lci)\.tf1|(?:www\.)?(?:tfou|ushuaiatv|histoire|tvbreizh))\.fr/(?:[^/]+/)*(?P<id>[^/?#.]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html', 'url': 'http://videos.tf1.fr/auto-moto/citroen-grand-c4-picasso-2013-presentation-officielle-8062060.html',
'info_dict': { 'info_dict': {
@ -48,6 +48,6 @@ class TF1IE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
wat_id = self._html_search_regex( wat_id = self._html_search_regex(
r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:#.*?)?\1', r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})(?:.*?)?\1',
webpage, 'wat id', group='id') webpage, 'wat id', group='id')
return self.url_result('wat:%s' % wat_id, 'Wat') return self.url_result('wat:%s' % wat_id, 'Wat')

View File

@ -151,6 +151,22 @@ class ThePlatformIE(ThePlatformBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
@classmethod
def _extract_urls(cls, webpage):
m = re.search(
r'''(?x)
<meta\s+
property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+
content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2
''', webpage)
if m:
return [m.group('url')]
matches = re.findall(
r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
if matches:
return list(zip(*matches))[1]
@staticmethod @staticmethod
def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False): def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
flags = '10' if include_qs else '00' flags = '10' if include_qs else '00'

View File

@ -37,6 +37,7 @@ class VeohIE(InfoExtractor):
'uploader': 'afp-news', 'uploader': 'afp-news',
'duration': 123, 'duration': 123,
}, },
'skip': 'This video has been deleted.',
}, },
{ {
'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX', 'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',

View File

@ -11,12 +11,14 @@ class ViceIE(InfoExtractor):
_TESTS = [{ _TESTS = [{
'url': 'http://www.vice.com/video/cowboy-capitalists-part-1', 'url': 'http://www.vice.com/video/cowboy-capitalists-part-1',
'md5': 'e9d77741f9e42ba583e683cd170660f7',
'info_dict': { 'info_dict': {
'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp', 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp',
'ext': 'flv', 'ext': 'flv',
'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov',
'duration': 725.983, 'duration': 725.983,
}, },
'add_ie': ['Ooyala'],
}, { }, {
'url': 'http://www.vice.com/video/how-to-hack-a-car', 'url': 'http://www.vice.com/video/how-to-hack-a-car',
'md5': '6fb2989a3fed069fb8eab3401fc2d3c9', 'md5': '6fb2989a3fed069fb8eab3401fc2d3c9',
@ -29,6 +31,7 @@ class ViceIE(InfoExtractor):
'uploader': 'Motherboard', 'uploader': 'Motherboard',
'upload_date': '20140529', 'upload_date': '20140529',
}, },
'add_ie': ['Youtube'],
}, { }, {
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
'only_matching': True, 'only_matching': True,

View File

@ -15,7 +15,8 @@ class VoxMediaIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Google\'s new material design direction', 'title': 'Google\'s new material design direction',
'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2', 'description': 'md5:2f44f74c4d14a1f800ea73e1c6832ad2',
} },
'add_ie': ['Ooyala'],
}, { }, {
# data-ooyala-id # data-ooyala-id
'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet', 'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
@ -25,7 +26,8 @@ class VoxMediaIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'The Nexus 6: hands-on with Google\'s phablet', 'title': 'The Nexus 6: hands-on with Google\'s phablet',
'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af', 'description': 'md5:87a51fe95ff8cea8b5bdb9ac7ae6a6af',
} },
'add_ie': ['Ooyala'],
}, { }, {
# volume embed # volume embed
'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill', 'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
@ -35,7 +37,8 @@ class VoxMediaIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'The new frontier of LGBTQ civil rights, explained', 'title': 'The new frontier of LGBTQ civil rights, explained',
'description': 'md5:0dc58e94a465cbe91d02950f770eb93f', 'description': 'md5:0dc58e94a465cbe91d02950f770eb93f',
} },
'add_ie': ['Ooyala'],
}, { }, {
# youtube embed # youtube embed
'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance', 'url': 'http://www.vox.com/2016/3/24/11291692/robot-dance',
@ -48,7 +51,8 @@ class VoxMediaIE(InfoExtractor):
'upload_date': '20160324', 'upload_date': '20160324',
'uploader_id': 'voxdotcom', 'uploader_id': 'voxdotcom',
'uploader': 'Vox', 'uploader': 'Vox',
} },
'add_ie': ['Youtube'],
}, { }, {
# SBN.VideoLinkset.entryGroup multiple ooyala embeds # SBN.VideoLinkset.entryGroup multiple ooyala embeds
'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok', 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
@ -117,7 +121,7 @@ class VoxMediaIE(InfoExtractor):
volume_webpage = self._download_webpage( volume_webpage = self._download_webpage(
'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid) 'http://volume.vox-cdn.com/embed/%s' % volume_uuid, volume_uuid)
video_data = self._parse_json(self._search_regex( video_data = self._parse_json(self._search_regex(
r'Volume\.createVideo\(({.+})\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid) r'Volume\.createVideo\(({.+})\s*,\s*{.*}\s*,\s*\[.*\]\s*,\s*{.*}\);', volume_webpage, 'video data'), volume_uuid)
for provider_video_type in ('ooyala', 'youtube'): for provider_video_type in ('ooyala', 'youtube'):
provider_video_id = video_data.get('%s_id' % provider_video_type) provider_video_id = video_data.get('%s_id' % provider_video_type)
if provider_video_id: if provider_video_id:

View File

@ -11,7 +11,96 @@ from ..utils import (
class WashingtonPostIE(InfoExtractor): class WashingtonPostIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])' IE_NAME = 'washingtonpost'
_VALID_URL = r'(?:washingtonpost:|https?://(?:www\.)?washingtonpost\.com/video/(?:[^/]+/)*)(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
_TEST = {
'url': 'https://www.washingtonpost.com/video/c/video/480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
'md5': '6f537e1334b714eb15f9563bd4b9cdfa',
'info_dict': {
'id': '480ba4ee-1ec7-11e6-82c2-a7dcb313287d',
'ext': 'mp4',
'title': 'Egypt finds belongings, debris from plane crash',
'description': 'md5:a17ceee432f215a5371388c1f680bd86',
'upload_date': '20160520',
'uploader': 'Reuters',
'timestamp': 1463778452,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % video_id,
video_id, transform_source=strip_jsonp)[0]['contentConfig']
title = video_data['title']
urls = []
formats = []
for s in video_data.get('streams', []):
s_url = s.get('url')
if not s_url or s_url in urls:
continue
urls.append(s_url)
video_type = s.get('type')
if video_type == 'smil':
continue
elif video_type in ('ts', 'hls') and ('_master.m3u8' in s_url or '_mobile.m3u8' in s_url):
m3u8_formats = self._extract_m3u8_formats(
s_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
for m3u8_format in m3u8_formats:
width = m3u8_format.get('width')
if not width:
continue
vbr = self._search_regex(
r'%d_%d_(\d+)' % (width, m3u8_format['height']), m3u8_format['url'], 'vbr', default=None)
if vbr:
m3u8_format.update({
'vbr': int_or_none(vbr),
})
formats.extend(m3u8_formats)
else:
width = int_or_none(s.get('width'))
vbr = int_or_none(s.get('bitrate'))
has_width = width != 0
formats.append({
'format_id': (
'%s-%d-%d' % (video_type, width, vbr)
if width
else video_type),
'vbr': vbr if has_width else None,
'width': width,
'height': int_or_none(s.get('height')),
'acodec': s.get('audioCodec'),
'vcodec': s.get('videoCodec') if has_width else 'none',
'filesize': int_or_none(s.get('fileSize')),
'url': s_url,
'ext': 'mp4',
'protocol': 'm3u8_native' if video_type in ('ts', 'hls') else None,
})
source_media_url = video_data.get('sourceMediaURL')
if source_media_url:
formats.append({
'format_id': 'source_media',
'url': source_media_url,
})
self._sort_formats(
formats, ('width', 'height', 'vbr', 'filesize', 'tbr', 'format_id'))
return {
'id': video_id,
'title': title,
'description': video_data.get('blurb'),
'uploader': video_data.get('credits', {}).get('source'),
'formats': formats,
'duration': int_or_none(video_data.get('videoDuration'), 100),
'timestamp': int_or_none(
video_data.get('dateConfig', {}).get('dateFirstPublished'), 1000),
}
class WashingtonPostArticleIE(InfoExtractor):
IE_NAME = 'washingtonpost:article'
_VALID_URL = r'https?://(?:www\.)?washingtonpost\.com/(?:[^/]+/)*(?P<id>[^/?#]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/', 'url': 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/',
'info_dict': { 'info_dict': {
@ -63,6 +152,10 @@ class WashingtonPostIE(InfoExtractor):
}] }]
}] }]
@classmethod
def suitable(cls, url):
return False if WashingtonPostIE.suitable(url) else super(WashingtonPostArticleIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
page_id = self._match_id(url) page_id = self._match_id(url)
webpage = self._download_webpage(url, page_id) webpage = self._download_webpage(url, page_id)
@ -74,54 +167,7 @@ class WashingtonPostIE(InfoExtractor):
<div\s+class="posttv-video-embed[^>]*?data-uuid=| <div\s+class="posttv-video-embed[^>]*?data-uuid=|
data-video-uuid= data-video-uuid=
)"([^"]+)"''', webpage) )"([^"]+)"''', webpage)
entries = [] entries = [self.url_result('washingtonpost:%s' % uuid, 'WashingtonPost', uuid) for uuid in uuids]
for i, uuid in enumerate(uuids, start=1):
vinfo_all = self._download_json(
'http://www.washingtonpost.com/posttv/c/videojson/%s?resType=jsonp' % uuid,
page_id,
transform_source=strip_jsonp,
note='Downloading information of video %d/%d' % (i, len(uuids))
)
vinfo = vinfo_all[0]['contentConfig']
uploader = vinfo.get('credits', {}).get('source')
timestamp = int_or_none(
vinfo.get('dateConfig', {}).get('dateFirstPublished'), 1000)
formats = [{
'format_id': (
'%s-%s-%s' % (s.get('type'), s.get('width'), s.get('bitrate'))
if s.get('width')
else s.get('type')),
'vbr': s.get('bitrate') if s.get('width') != 0 else None,
'width': s.get('width'),
'height': s.get('height'),
'acodec': s.get('audioCodec'),
'vcodec': s.get('videoCodec') if s.get('width') != 0 else 'none',
'filesize': s.get('fileSize'),
'url': s.get('url'),
'ext': 'mp4',
'preference': -100 if s.get('type') == 'smil' else None,
'protocol': {
'MP4': 'http',
'F4F': 'f4m',
}.get(s.get('type')),
} for s in vinfo.get('streams', [])]
source_media_url = vinfo.get('sourceMediaURL')
if source_media_url:
formats.append({
'format_id': 'source_media',
'url': source_media_url,
})
self._sort_formats(formats)
entries.append({
'id': uuid,
'title': vinfo['title'],
'description': vinfo.get('blurb'),
'uploader': uploader,
'formats': formats,
'duration': int_or_none(vinfo.get('videoDuration'), 100),
'timestamp': timestamp,
})
return { return {
'_type': 'playlist', '_type': 'playlist',

View File

@ -2,25 +2,26 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import hashlib
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
unified_strdate, unified_strdate,
HEADRequest,
float_or_none,
) )
class WatIE(InfoExtractor): class WatIE(InfoExtractor):
_VALID_URL = r'(?:wat:(?P<real_id>\d{8})|https?://www\.wat\.tv/video/(?P<display_id>.*)-(?P<short_id>.*?)_.*?\.html)' _VALID_URL = r'(?:wat:|https?://(?:www\.)?wat\.tv/video/.*-)(?P<id>[0-9a-z]+)'
IE_NAME = 'wat.tv' IE_NAME = 'wat.tv'
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html', 'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html',
'md5': 'ce70e9223945ed26a8056d413ca55dc9', 'md5': '83d882d9de5c9d97f0bb2c6273cde56a',
'info_dict': { 'info_dict': {
'id': '11713067', 'id': '11713067',
'display_id': 'soupe-figues-l-orange-aux-epices',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Soupe de figues à l\'orange et aux épices', 'title': 'Soupe de figues à l\'orange et aux épices',
'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.', 'description': 'Retrouvez l\'émission "Petits plats en équilibre", diffusée le 18 août 2014.',
@ -33,7 +34,6 @@ class WatIE(InfoExtractor):
'md5': 'fbc84e4378165278e743956d9c1bf16b', 'md5': 'fbc84e4378165278e743956d9c1bf16b',
'info_dict': { 'info_dict': {
'id': '11713075', 'id': '11713075',
'display_id': 'gregory-lemarchal-voix-ange',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)', 'title': 'Grégory Lemarchal, une voix d\'ange depuis 10 ans (1/3)',
'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3', 'description': 'md5:b7a849cf16a2b733d9cd10c52906dee3',
@ -44,96 +44,85 @@ class WatIE(InfoExtractor):
}, },
] ]
def download_video_info(self, real_id): def _real_extract(self, url):
video_id = self._match_id(url)
video_id = video_id if video_id.isdigit() and len(video_id) > 6 else compat_str(int(video_id, 36))
# 'contentv4' is used in the website, but it also returns the related # 'contentv4' is used in the website, but it also returns the related
# videos, we don't need them # videos, we don't need them
info = self._download_json('http://www.wat.tv/interface/contentv3/' + real_id, real_id) video_info = self._download_json(
return info['media'] 'http://www.wat.tv/interface/contentv3/' + video_id, video_id)['media']
def _real_extract(self, url):
def real_id_for_chapter(chapter):
return chapter['tc_start'].split('-')[0]
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('display_id')
real_id = mobj.group('real_id')
if not real_id:
short_id = mobj.group('short_id')
webpage = self._download_webpage(url, display_id or short_id)
real_id = self._search_regex(r'xtpage = ".*-(.*?)";', webpage, 'real id')
video_info = self.download_video_info(real_id)
error_desc = video_info.get('error_desc') error_desc = video_info.get('error_desc')
if error_desc: if error_desc:
raise ExtractorError( raise ExtractorError(
'%s returned error: %s' % (self.IE_NAME, error_desc), expected=True) '%s returned error: %s' % (self.IE_NAME, error_desc), expected=True)
geo_list = video_info.get('geoList')
country = geo_list[0] if geo_list else ''
chapters = video_info['chapters'] chapters = video_info['chapters']
first_chapter = chapters[0] first_chapter = chapters[0]
files = video_info['files']
first_file = files[0]
if real_id_for_chapter(first_chapter) != real_id: def video_id_for_chapter(chapter):
return chapter['tc_start'].split('-')[0]
if video_id_for_chapter(first_chapter) != video_id:
self.to_screen('Multipart video detected') self.to_screen('Multipart video detected')
chapter_urls = [] entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters]
for chapter in chapters: return self.playlist_result(entries, video_id, video_info['title'])
chapter_id = real_id_for_chapter(chapter)
# Yes, when we this chapter is processed by WatIE,
# it will download the info again
chapter_info = self.download_video_info(chapter_id)
chapter_urls.append(chapter_info['url'])
entries = [self.url_result(chapter_url) for chapter_url in chapter_urls]
return self.playlist_result(entries, real_id, video_info['title'])
upload_date = None
if 'date_diffusion' in first_chapter:
upload_date = unified_strdate(first_chapter['date_diffusion'])
# Otherwise we can continue and extract just one part, we have to use # Otherwise we can continue and extract just one part, we have to use
# the short id for getting the video url # the video id for getting the video url
formats = [{ date_diffusion = first_chapter.get('date_diffusion')
'url': 'http://wat.tv/get/android5/%s.mp4' % real_id, upload_date = unified_strdate(date_diffusion) if date_diffusion else None
'format_id': 'Mobile',
}]
fmts = [('SD', 'web')] def extract_url(path_template, url_type):
if first_file.get('hasHD'): req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id)
fmts.append(('HD', 'webhd')) head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type)
red_url = head.geturl()
if req_url == red_url:
raise ExtractorError(
'%s said: Sorry, this video is not available from your country.' % self.IE_NAME,
expected=True)
return red_url
def compute_token(param): m3u8_url = extract_url('ipad/%s.m3u8', 'm3u8')
timestamp = '%08x' % int(self._download_webpage( http_url = extract_url('android5/%s.mp4', 'http')
'http://www.wat.tv/servertime', real_id,
'Downloading server time').split('|')[0])
magic = '9b673b13fa4682ed14c3cfa5af5310274b514c4133e9b3a81e6e3aba009l2564'
return '%s/%s' % (hashlib.md5((magic + param + timestamp).encode('ascii')).hexdigest(), timestamp)
for fmt in fmts: formats = []
webid = '/%s/%s' % (fmt[1], real_id) m3u8_formats = self._extract_m3u8_formats(
video_url = self._download_webpage( m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
'http://www.wat.tv/get%s?token=%s&getURL=1&country=%s' % (webid, compute_token(webid), country), formats.extend(m3u8_formats)
real_id, formats.extend(self._extract_f4m_formats(
'Downloading %s video URL' % fmt[0], m3u8_url.replace('ios.', 'web.').replace('.m3u8', '.f4m'),
'Failed to download %s video URL' % fmt[0], video_id, f4m_id='hds', fatal=False))
False) for m3u8_format in m3u8_formats:
if not video_url: mobj = re.search(
r'audio.*?%3D(\d+)(?:-video.*?%3D(\d+))?', m3u8_format['url'])
if not mobj:
continue continue
formats.append({ abr, vbr = mobj.groups()
'url': video_url, abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
'ext': 'mp4', m3u8_format.update({
'format_id': fmt[0], 'vbr': vbr,
'abr': abr,
}) })
if not vbr or not abr:
continue
f = m3u8_format.copy()
f.update({
'url': re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url),
'format_id': f['format_id'].replace('hls', 'http'),
'protocol': 'http',
})
formats.append(f)
self._sort_formats(formats)
return { return {
'id': real_id, 'id': video_id,
'display_id': display_id,
'title': first_chapter['title'], 'title': first_chapter['title'],
'thumbnail': first_chapter['preview'], 'thumbnail': first_chapter['preview'],
'description': first_chapter['description'], 'description': first_chapter['description'],
'view_count': video_info['views'], 'view_count': video_info['views'],
'upload_date': upload_date, 'upload_date': upload_date,
'duration': first_file['duration'], 'duration': video_info['files'][0]['duration'],
'formats': formats, 'formats': formats,
} }

View File

@ -3,16 +3,17 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
sanitized_Request,
int_or_none, int_or_none,
float_or_none,
) )
class WistiaIE(InfoExtractor): class WistiaIE(InfoExtractor):
_VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)' _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.net/embed/iframe/)(?P<id>[a-z0-9]+)'
_API_URL = 'http://fast.wistia.com/embed/medias/{0:}.json' _API_URL = 'http://fast.wistia.com/embed/medias/%s.json'
_IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
_TEST = { _TESTS = [{
'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', 'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
'md5': 'cafeb56ec0c53c18c97405eecb3133df', 'md5': 'cafeb56ec0c53c18c97405eecb3133df',
'info_dict': { 'info_dict': {
@ -24,36 +25,54 @@ class WistiaIE(InfoExtractor):
'timestamp': 1386185018, 'timestamp': 1386185018,
'duration': 117, 'duration': 117,
}, },
} }, {
'url': 'wistia:sh7fpupwlt',
'only_matching': True,
}, {
# with hls video
'url': 'wistia:807fafadvk',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
request = sanitized_Request(self._API_URL.format(video_id)) data_json = self._download_json(
request.add_header('Referer', url) # Some videos require this. self._API_URL % video_id, video_id,
data_json = self._download_json(request, video_id) # Some videos require this.
headers={
'Referer': url if url.startswith('http') else self._IFRAME_URL % video_id,
})
if data_json.get('error'): if data_json.get('error'):
raise ExtractorError('Error while getting the playlist', raise ExtractorError(
expected=True) 'Error while getting the playlist', expected=True)
data = data_json['media'] data = data_json['media']
title = data['name'] title = data['name']
formats = [] formats = []
thumbnails = [] thumbnails = []
for a in data['assets']: for a in data['assets']:
aurl = a.get('url')
if not aurl:
continue
astatus = a.get('status') astatus = a.get('status')
atype = a.get('type') atype = a.get('type')
if (astatus is not None and astatus != 2) or atype == 'preview': if (astatus is not None and astatus != 2) or atype in ('preview', 'storyboard'):
continue continue
elif atype in ('still', 'still_image'): elif atype in ('still', 'still_image'):
thumbnails.append({ thumbnails.append({
'url': a['url'], 'url': aurl,
'resolution': '%dx%d' % (a['width'], a['height']), 'width': int_or_none(a.get('width')),
'height': int_or_none(a.get('height')),
}) })
else: else:
aext = a.get('ext')
is_m3u8 = a.get('container') == 'm3u8' or aext == 'm3u8'
formats.append({ formats.append({
'format_id': atype, 'format_id': atype,
'url': a['url'], 'url': aurl,
'tbr': int_or_none(a.get('bitrate')), 'tbr': int_or_none(a.get('bitrate')),
'vbr': int_or_none(a.get('opt_vbitrate')), 'vbr': int_or_none(a.get('opt_vbitrate')),
'width': int_or_none(a.get('width')), 'width': int_or_none(a.get('width')),
@ -61,7 +80,8 @@ class WistiaIE(InfoExtractor):
'filesize': int_or_none(a.get('size')), 'filesize': int_or_none(a.get('size')),
'vcodec': a.get('codec'), 'vcodec': a.get('codec'),
'container': a.get('container'), 'container': a.get('container'),
'ext': a.get('ext'), 'ext': 'mp4' if is_m3u8 else aext,
'protocol': 'm3u8' if is_m3u8 else None,
'preference': 1 if atype == 'original' else None, 'preference': 1 if atype == 'original' else None,
}) })
@ -73,6 +93,6 @@ class WistiaIE(InfoExtractor):
'description': data.get('seoDescription'), 'description': data.get('seoDescription'),
'formats': formats, 'formats': formats,
'thumbnails': thumbnails, 'thumbnails': thumbnails,
'duration': int_or_none(data.get('duration')), 'duration': float_or_none(data.get('duration')),
'timestamp': int_or_none(data.get('createdAt')), 'timestamp': int_or_none(data.get('createdAt')),
} }

View File

@ -12,37 +12,52 @@ from ..utils import (
class XHamsterIE(InfoExtractor): class XHamsterIE(InfoExtractor):
_VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.+?)\.html(?:\?.*)?' _VALID_URL = r'(?P<proto>https?)://(?:.+?\.)?xhamster\.com/movies/(?P<id>[0-9]+)/(?P<seo>.*?)\.html(?:\?.*)?'
_TESTS = [ _TESTS = [{
{ 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html', 'md5': '8281348b8d3c53d39fffb377d24eac4e',
'info_dict': { 'info_dict': {
'id': '1509445', 'id': '1509445',
'ext': 'mp4', 'ext': 'mp4',
'title': 'FemaleAgent Shy beauty takes the bait', 'title': 'FemaleAgent Shy beauty takes the bait',
'upload_date': '20121014', 'upload_date': '20121014',
'uploader': 'Ruseful2011', 'uploader': 'Ruseful2011',
'duration': 893.52, 'duration': 893.52,
'age_limit': 18, 'age_limit': 18,
}
}, },
{ }, {
'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd', 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
'info_dict': { 'info_dict': {
'id': '2221348', 'id': '2221348',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Britney Spears Sexy Booty', 'title': 'Britney Spears Sexy Booty',
'upload_date': '20130914', 'upload_date': '20130914',
'uploader': 'jojo747400', 'uploader': 'jojo747400',
'duration': 200.48, 'duration': 200.48,
'age_limit': 18, 'age_limit': 18,
}
}, },
{ 'params': {
'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html', 'skip_download': True,
'only_matching': True,
}, },
] }, {
# empty seo
'url': 'http://xhamster.com/movies/5667973/.html',
'info_dict': {
'id': '5667973',
'ext': 'mp4',
'title': '....',
'upload_date': '20160208',
'uploader': 'parejafree',
'duration': 72.0,
'age_limit': 18,
},
'params': {
'skip_download': True,
},
}, {
'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
def extract_video_url(webpage, name): def extract_video_url(webpage, name):
@ -170,7 +185,7 @@ class XHamsterEmbedIE(InfoExtractor):
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_url = self._search_regex( video_url = self._search_regex(
r'href="(https?://xhamster\.com/movies/%s/[^"]+\.html[^"]*)"' % video_id, r'href="(https?://xhamster\.com/movies/%s/[^"]*\.html[^"]*)"' % video_id,
webpage, 'xhamster url', default=None) webpage, 'xhamster url', default=None)
if not video_url: if not video_url:

View File

@ -83,11 +83,8 @@ def update_self(to_screen, verbose, opener):
print_notes(to_screen, versions_info['versions']) print_notes(to_screen, versions_info['versions'])
filename = sys.argv[0] # sys.executable is set to the full pathname of the exe-file for py2exe
# Py2EXE: Filename could be different filename = sys.executable if hasattr(sys, 'frozen') else sys.argv[0]
if hasattr(sys, 'frozen') and not os.path.isfile(filename):
if os.path.isfile(filename + '.exe'):
filename += '.exe'
if not os.access(filename, os.W_OK): if not os.access(filename, os.W_OK):
to_screen('ERROR: no write permissions on %s' % filename) to_screen('ERROR: no write permissions on %s' % filename)
@ -95,7 +92,7 @@ def update_self(to_screen, verbose, opener):
# Py2EXE # Py2EXE
if hasattr(sys, 'frozen'): if hasattr(sys, 'frozen'):
exe = os.path.abspath(filename) exe = filename
directory = os.path.dirname(exe) directory = os.path.dirname(exe)
if not os.access(directory, os.W_OK): if not os.access(directory, os.W_OK):
to_screen('ERROR: no write permissions on %s' % directory) to_screen('ERROR: no write permissions on %s' % directory)

View File

@ -1055,7 +1055,10 @@ def unified_strdate(date_str, day_first=True):
if upload_date is None: if upload_date is None:
timetuple = email.utils.parsedate_tz(date_str) timetuple = email.utils.parsedate_tz(date_str)
if timetuple: if timetuple:
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') try:
upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
except ValueError:
pass
if upload_date is not None: if upload_date is not None:
return compat_str(upload_date) return compat_str(upload_date)

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2016.05.16' __version__ = '2016.05.21.2'