Merge branch 'master' into use-other-downloaders

This commit is contained in:
Rogério Brito 2013-02-05 23:40:29 -02:00
commit 2bc1188cc8
13 changed files with 229 additions and 200 deletions

1
.gitignore vendored
View File

@ -17,3 +17,4 @@ youtube-dl.tar.gz
.coverage .coverage
cover/ cover/
updates_key.pem updates_key.pem
*.egg-info

View File

@ -7,6 +7,7 @@ PREFIX=/usr/local
BINDIR=$(PREFIX)/bin BINDIR=$(PREFIX)/bin
MANDIR=$(PREFIX)/man MANDIR=$(PREFIX)/man
SYSCONFDIR=/etc SYSCONFDIR=/etc
PYTHON=/usr/bin/env python
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
install -d $(DESTDIR)$(BINDIR) install -d $(DESTDIR)$(BINDIR)
@ -27,7 +28,7 @@ tar: youtube-dl.tar.gz
youtube-dl: youtube_dl/*.py youtube-dl: youtube_dl/*.py
zip --quiet youtube-dl youtube_dl/*.py zip --quiet youtube-dl youtube_dl/*.py
zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py zip --quiet --junk-paths youtube-dl youtube_dl/__main__.py
echo '#!/usr/bin/env python' > youtube-dl echo '#!$(PYTHON)' > youtube-dl
cat youtube-dl.zip >> youtube-dl cat youtube-dl.zip >> youtube-dl
rm youtube-dl.zip rm youtube-dl.zip
chmod a+x youtube-dl chmod a+x youtube-dl

View File

@ -38,6 +38,10 @@ which means you can modify it, redistribute it or use it however you like.
--reject-title REGEX skip download for matching titles (regex or --reject-title REGEX skip download for matching titles (regex or
caseless sub-string) caseless sub-string)
--max-downloads NUMBER Abort after downloading NUMBER files --max-downloads NUMBER Abort after downloading NUMBER files
--min-filesize SIZE Do not download any videos smaller than SIZE (e.g.
50k or 44.6m)
--max-filesize SIZE Do not download any videos larger than SIZE (e.g.
50k or 44.6m)
## Filesystem Options: ## Filesystem Options:
-t, --title use title in file name -t, --title use title in file name

View File

@ -83,4 +83,7 @@ ROOT=$(pwd)
) )
rm -rf build rm -rf build
echo "Uploading to PyPi ..."
python setup.py sdist upload
echo "\n### DONE!" echo "\n### DONE!"

View File

@ -2,10 +2,14 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import print_function from __future__ import print_function
from distutils.core import setup
import pkg_resources import pkg_resources
import sys import sys
try:
from setuptools import setup
except ImportError:
from distutils.core import setup
try: try:
import py2exe import py2exe
"""This will create an exe that needs Microsoft Visual C++ 2008 Redistributable Package""" """This will create an exe that needs Microsoft Visual C++ 2008 Redistributable Package"""

View File

@ -98,7 +98,7 @@ def generator(test_case):
for tc in test_cases: for tc in test_cases:
if not test_case.get('params', {}).get('skip_download', False): if not test_case.get('params', {}).get('skip_download', False):
self.assertTrue(os.path.exists(tc['file'])) self.assertTrue(os.path.exists(tc['file']), msg='Missing file ' + tc['file'])
self.assertTrue(tc['file'] in finished_hook_called) self.assertTrue(tc['file'] in finished_hook_called)
self.assertTrue(os.path.exists(tc['file'] + '.info.json')) self.assertTrue(os.path.exists(tc['file'] + '.info.json'))
if 'md5' in tc: if 'md5' in tc:
@ -107,11 +107,7 @@ def generator(test_case):
with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof: with io.open(tc['file'] + '.info.json', encoding='utf-8') as infof:
info_dict = json.load(infof) info_dict = json.load(infof)
for (info_field, value) in tc.get('info_dict', {}).items(): for (info_field, value) in tc.get('info_dict', {}).items():
if value.startswith('md5:'): self.assertEqual(value, info_dict.get(info_field))
md5_info_value = hashlib.md5(info_dict.get(info_field, '')).hexdigest()
self.assertEqual(value[3:], md5_info_value)
else:
self.assertEqual(value, info_dict.get(info_field))
finally: finally:
for tc in test_cases: for tc in test_cases:
_try_rm(tc['file']) _try_rm(tc['file'])

View File

@ -76,7 +76,8 @@
"name": "StanfordOpenClassroom", "name": "StanfordOpenClassroom",
"md5": "544a9468546059d4e80d76265b0443b8", "md5": "544a9468546059d4e80d76265b0443b8",
"url": "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100", "url": "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100",
"file": "PracticalUnix_intro-environment.mp4" "file": "PracticalUnix_intro-environment.mp4",
"skip": "Currently offline"
}, },
{ {
"name": "XNXX", "name": "XNXX",
@ -181,37 +182,12 @@
}, },
{ {
"name": "ComedyCentral", "name": "ComedyCentral",
"url": "http://www.thedailyshow.com/full-episodes/thu-december-13-2012-kristen-stewart", "url": "http://www.thedailyshow.com/watch/thu-december-13-2012/kristen-stewart",
"playlist": [ "file": "422212.mp4",
{ "md5": "4e2f5cb088a83cd8cdb7756132f9739d",
"file": "422204.mp4", "info_dict": {
"md5": "7a7abe068b31ff03e7b8a37596e72380", "title": "thedailyshow-kristen-stewart part 1"
"info_dict": { }
"title": "thedailyshow-thu-december-13-2012-kristen-stewart part 1"
}
},
{
"file": "422205.mp4",
"md5": "30552b7274c94dbb933f64600eadddd2",
"info_dict": {
"title": "thedailyshow-thu-december-13-2012-kristen-stewart part 2"
}
},
{
"file": "422206.mp4",
"md5": "1f4c0664b352cb8e8fe85d5da4fbee91",
"info_dict": {
"title": "thedailyshow-thu-december-13-2012-kristen-stewart part 3"
}
},
{
"file": "422207.mp4",
"md5": "f61ee8a4e6bd1308438e03badad78554",
"info_dict": {
"title": "thedailyshow-thu-december-13-2012-kristen-stewart part 4"
}
}
]
}, },
{ {
"name": "RBMARadio", "name": "RBMARadio",
@ -225,5 +201,80 @@
"uploader_id": "ford-lopatin", "uploader_id": "ford-lopatin",
"location": "Spain" "location": "Spain"
} }
},
{
"name": "Facebook",
"url": "https://www.facebook.com/photo.php?v=120708114770723",
"file": "120708114770723.mp4",
"md5": "48975a41ccc4b7a581abd68651c1a5a8",
"info_dict": {
"title": "PEOPLE ARE AWESOME 2013",
"duration": 279
}
},
{
"name": "EightTracks",
"url": "http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
"playlist": [
{
"file": "11885610.m4a",
"md5": "96ce57f24389fc8734ce47f4c1abcc55",
"info_dict": {
"title": "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
"uploader_id": "ytdl"
}
},
{
"file": "11885608.m4a",
"md5": "4ab26f05c1f7291ea460a3920be8021f",
"info_dict": {
"title": "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
"uploader_id": "ytdl"
}
},
{
"file": "11885679.m4a",
"md5": "d30b5b5f74217410f4689605c35d1fd7",
"info_dict": {
"title": "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885680.m4a",
"md5": "4eb0a669317cd725f6bbd336a29f923a",
"info_dict": {
"title": "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885682.m4a",
"md5": "1893e872e263a2705558d1d319ad19e8",
"info_dict": {
"title": "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885683.m4a",
"md5": "b673c46f47a216ab1741ae8836af5899",
"info_dict": {
"title": "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885684.m4a",
"md5": "1d74534e95df54986da7f5abf7d842b7",
"info_dict": {
"title": "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad"
}
},
{
"file": "11885685.m4a",
"md5": "f081f47af8f6ae782ed131d38b9cd1c0",
"info_dict": {
"title": "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad"
}
}
]
} }
] ]

View File

@ -82,6 +82,8 @@ class FileDownloader(object):
subtitleslang: Language of the subtitles to download subtitleslang: Language of the subtitles to download
test: Download only first bytes to test the downloader. test: Download only first bytes to test the downloader.
keepvideo: Keep the video file after post-processing keepvideo: Keep the video file after post-processing
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
""" """
params = None params = None
@ -767,6 +769,15 @@ class FileDownloader(object):
data_len = data.info().get('Content-length', None) data_len = data.info().get('Content-length', None)
if data_len is not None: if data_len is not None:
data_len = int(data_len) + resume_len data_len = int(data_len) + resume_len
min_data_len = self.params.get("min_filesize", None)
max_data_len = self.params.get("max_filesize", None)
if min_data_len is not None and data_len < min_data_len:
self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
return False
if max_data_len is not None and data_len > max_data_len:
self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
data_len_str = self.format_bytes(data_len) data_len_str = self.format_bytes(data_len)
byte_counter = 0 + resume_len byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024) block_size = self.params.get('buffersize', 1024)

View File

@ -5,6 +5,7 @@ from __future__ import absolute_import
import base64 import base64
import datetime import datetime
import itertools
import netrc import netrc
import os import os
import re import re
@ -263,13 +264,18 @@ class YoutubeIE(InfoExtractor):
srt_lang = list(srt_lang_list.keys())[0] srt_lang = list(srt_lang_list.keys())[0]
if not srt_lang in srt_lang_list: if not srt_lang in srt_lang_list:
return (u'WARNING: no closed captions found in the specified language', None) return (u'WARNING: no closed captions found in the specified language', None)
request = compat_urllib_request.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id)) params = compat_urllib_parse.urlencode({
'lang': srt_lang,
'name': srt_lang_list[srt_lang].encode('utf-8'),
'v': video_id,
})
url = 'http://www.youtube.com/api/timedtext?' + params
try: try:
srt_xml = compat_urllib_request.urlopen(request).read().decode('utf-8') srt_xml = compat_urllib_request.urlopen(url).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None) return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
if not srt_xml: if not srt_xml:
return (u'WARNING: unable to download video subtitles', None) return (u'WARNING: Did not fetch video subtitles', None)
return (None, self._closed_captions_xml_to_srt(srt_xml)) return (None, self._closed_captions_xml_to_srt(srt_xml))
def _print_formats(self, formats): def _print_formats(self, formats):
@ -972,7 +978,7 @@ class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com.""" """Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs # _VALID_URL matches Vimeo URLs
_VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?:videos?/)?([0-9]+)' _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
IE_NAME = u'vimeo' IE_NAME = u'vimeo'
def __init__(self, downloader=None): def __init__(self, downloader=None):
@ -993,7 +999,11 @@ class VimeoIE(InfoExtractor):
self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
return return
video_id = mobj.group(1) video_id = mobj.group('id')
if not mobj.group('proto'):
url = 'https://' + url
if mobj.group('direct_link'):
url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information # Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url, None, std_headers) request = compat_urllib_request.Request(url, None, std_headers)
@ -1980,62 +1990,14 @@ class DepositFilesIE(InfoExtractor):
class FacebookIE(InfoExtractor): class FacebookIE(InfoExtractor):
"""Information Extractor for Facebook""" """Information Extractor for Facebook"""
_WORKING = False
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)' _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
_NETRC_MACHINE = 'facebook' _NETRC_MACHINE = 'facebook'
_available_formats = ['video', 'highqual', 'lowqual']
_video_extensions = {
'video': 'mp4',
'highqual': 'mp4',
'lowqual': 'mp4',
}
IE_NAME = u'facebook' IE_NAME = u'facebook'
def __init__(self, downloader=None):
InfoExtractor.__init__(self, downloader)
def _reporter(self, message):
"""Add header and report message."""
self._downloader.to_screen(u'[facebook] %s' % message)
def report_login(self): def report_login(self):
"""Report attempt to log in.""" """Report attempt to log in."""
self._reporter(u'Logging in') self._downloader.to_screen(u'[%s] Logging in' % self.IE_NAME)
def report_video_webpage_download(self, video_id):
"""Report attempt to download video webpage."""
self._reporter(u'%s: Downloading video webpage' % video_id)
def report_information_extraction(self, video_id):
"""Report attempt to extract video information."""
self._reporter(u'%s: Extracting video information' % video_id)
def _parse_page(self, video_webpage):
"""Extract video information from page"""
# General data
data = {'title': r'\("video_title", "(.*?)"\)',
'description': r'<div class="datawrap">(.*?)</div>',
'owner': r'\("video_owner_name", "(.*?)"\)',
'thumbnail': r'\("thumb_url", "(?P<THUMB>.*?)"\)',
}
video_info = {}
for piece in data.keys():
mobj = re.search(data[piece], video_webpage)
if mobj is not None:
video_info[piece] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
# Video urls
video_urls = {}
for fmt in self._available_formats:
mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
if mobj is not None:
# URL is in a Javascript segment inside an escaped Unicode format within
# the generally utf-8 page
video_urls[fmt] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
video_info['video_urls'] = video_urls
return video_info
def _real_initialize(self): def _real_initialize(self):
if self._downloader is None: if self._downloader is None:
@ -2088,100 +2050,35 @@ class FacebookIE(InfoExtractor):
return return
video_id = mobj.group('ID') video_id = mobj.group('ID')
# Get video webpage url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
self.report_video_webpage_download(video_id) webpage = self._download_webpage(url, video_id)
request = compat_urllib_request.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
try:
page = compat_urllib_request.urlopen(request)
video_webpage = page.read()
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
# Start extracting information BEFORE = '[["allowFullScreen","true"],["allowScriptAccess","always"],["salign","tl"],["scale","noscale"],["wmode","opaque"]].forEach(function(param) {swf.addParam(param[0], param[1]);});\n'
self.report_information_extraction(video_id) AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
if not m:
raise ExtractorError(u'Cannot parse data')
data = dict(json.loads(m.group(1)))
params_raw = compat_urllib_parse.unquote(data['params'])
params = json.loads(params_raw)
video_url = params['hd_src']
video_duration = int(params['video_duration'])
# Extract information m = re.search('<h2 class="uiHeaderTitle">([^<]+)</h2>', webpage)
video_info = self._parse_page(video_webpage) if not m:
raise ExtractorError(u'Cannot find title in webpage')
video_title = unescapeHTML(m.group(1))
# uploader info = {
if 'owner' not in video_info: 'id': video_id,
self._downloader.trouble(u'ERROR: unable to extract uploader nickname') 'title': video_title,
return 'url': video_url,
video_uploader = video_info['owner'] 'ext': 'mp4',
'duration': video_duration,
'thumbnail': params['thumbnail_src'],
}
return [info]
# title
if 'title' not in video_info:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
video_title = video_info['title']
video_title = video_title.decode('utf-8')
# thumbnail image
if 'thumbnail' not in video_info:
self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
video_thumbnail = ''
else:
video_thumbnail = video_info['thumbnail']
# upload date
upload_date = None
if 'upload_date' in video_info:
upload_time = video_info['upload_date']
timetuple = email.utils.parsedate_tz(upload_time)
if timetuple is not None:
try:
upload_date = time.strftime('%Y%m%d', timetuple[0:9])
except:
pass
# description
video_description = video_info.get('description', 'No description available.')
url_map = video_info['video_urls']
if url_map:
# Decide which formats to download
req_format = self._downloader.params.get('format', None)
format_limit = self._downloader.params.get('format_limit', None)
if format_limit is not None and format_limit in self._available_formats:
format_list = self._available_formats[self._available_formats.index(format_limit):]
else:
format_list = self._available_formats
existing_formats = [x for x in format_list if x in url_map]
if len(existing_formats) == 0:
self._downloader.trouble(u'ERROR: no known formats available for video')
return
if req_format is None:
video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
elif req_format == 'worst':
video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
elif req_format == '-1':
video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
else:
# Specific format
if req_format not in url_map:
self._downloader.trouble(u'ERROR: requested format not available')
return
video_url_list = [(req_format, url_map[req_format])] # Specific format
results = []
for format_param, video_real_url in video_url_list:
# Extension
video_extension = self._video_extensions.get(format_param, 'mp4')
results.append({
'id': video_id.decode('utf-8'),
'url': video_real_url.decode('utf-8'),
'uploader': video_uploader.decode('utf-8'),
'upload_date': upload_date,
'title': video_title,
'ext': video_extension.decode('utf-8'),
'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
'thumbnail': video_thumbnail.decode('utf-8'),
'description': video_description.decode('utf-8'),
})
return results
class BlipTVIE(InfoExtractor): class BlipTVIE(InfoExtractor):
"""Information extractor for blip.tv""" """Information extractor for blip.tv"""
@ -2983,8 +2880,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
if mobj is None: if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url) raise ExtractorError(u'Invalid URL: %s' % url)
return
if mobj.group('course') and mobj.group('video'): # A specific video if mobj.group('course') and mobj.group('video'): # A specific video
course = mobj.group('course') course = mobj.group('course')
@ -3021,12 +2917,9 @@ class StanfordOpenClassroomIE(InfoExtractor):
'upload_date': None, 'upload_date': None,
} }
self.report_download_webpage(info['id']) coursepage = self._download_webpage(url, info['id'],
try: note='Downloading course info page',
coursepage = compat_urllib_request.urlopen(url).read() errnote='Unable to download course info page')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download course info page: ' + compat_str(err))
return
m = re.search('<h1>([^<]+)</h1>', coursepage) m = re.search('<h1>([^<]+)</h1>', coursepage)
if m: if m:
@ -3050,7 +2943,6 @@ class StanfordOpenClassroomIE(InfoExtractor):
assert entry['type'] == 'reference' assert entry['type'] == 'reference'
results += self.extract(entry['url']) results += self.extract(entry['url'])
return results return results
else: # Root page else: # Root page
info = { info = {
'id': 'Stanford OpenClassroom', 'id': 'Stanford OpenClassroom',
@ -3927,8 +3819,6 @@ class PornotubeIE(InfoExtractor):
return [info] return [info]
class YouJizzIE(InfoExtractor): class YouJizzIE(InfoExtractor):
"""Information extractor for youjizz.com.""" """Information extractor for youjizz.com."""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$' _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
@ -3975,6 +3865,49 @@ class YouJizzIE(InfoExtractor):
return [info] return [info]
class EightTracksIE(InfoExtractor):
IE_NAME = '8tracks'
_VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
if mobj is None:
raise ExtractorError(u'Invalid URL: %s' % url)
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
m = re.search(r"new TRAX.Mix\((.*?)\);\n*\s*TRAX.initSearchAutocomplete\('#search'\);", webpage, flags=re.DOTALL)
if not m:
raise ExtractorError(u'Cannot find trax information')
json_like = m.group(1)
data = json.loads(json_like)
session = str(random.randint(0, 1000000000))
mix_id = data['id']
track_count = data['tracks_count']
first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
next_url = first_url
res = []
for i in itertools.count():
api_json = self._download_webpage(next_url, playlist_id,
note=u'Downloading song information %s/%s' % (str(i+1), track_count),
errnote=u'Failed to download song information')
api_data = json.loads(api_json)
track_data = api_data[u'set']['track']
info = {
'id': track_data['id'],
'url': track_data['track_file_stream_url'],
'title': track_data['performer'] + u' - ' + track_data['name'],
'raw_title': track_data['name'],
'uploader_id': data['user']['login'],
'ext': 'm4a',
}
res.append(info)
if api_data['set']['at_last_track']:
break
next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
return res
def gen_extractors(): def gen_extractors():
""" Return a list of an instance of every supported extractor. """ Return a list of an instance of every supported extractor.
@ -4021,6 +3954,7 @@ def gen_extractors():
SteamIE(), SteamIE(),
UstreamIE(), UstreamIE(),
RBMARadioIE(), RBMARadioIE(),
EightTracksIE(),
GenericIE() GenericIE()
] ]

View File

@ -143,10 +143,10 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
more_opts = [] more_opts = []
if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'): if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
if self._preferredcodec == 'm4a' and filecodec == 'aac': if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
# Lossless, but in another container # Lossless, but in another container
acodec = 'copy' acodec = 'copy'
extension = self._preferredcodec extension = 'm4a'
more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc'] more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']: elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
# Lossless if possible # Lossless if possible

View File

@ -150,6 +150,9 @@ def parseOpts():
selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)') selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)') selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None) selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
selection.add_option('--min-filesize', metavar='SIZE', dest='min_filesize', help="Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)", default=None)
selection.add_option('--max-filesize', metavar='SIZE', dest='max_filesize', help="Do not download any videos larger than SIZE (e.g. 50k or 44.6m)", default=None)
authentication.add_option('-u', '--username', authentication.add_option('-u', '--username',
dest='username', metavar='USERNAME', help='account username') dest='username', metavar='USERNAME', help='account username')
@ -290,10 +293,13 @@ def _real_main():
else: else:
try: try:
jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile) jar = compat_cookiejar.MozillaCookieJar(opts.cookiefile)
if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): if os.access(opts.cookiefile, os.R_OK):
jar.load() jar.load()
except (IOError, OSError) as err: except (IOError, OSError) as err:
sys.exit(u'ERROR: unable to open cookie file') if opts.verbose:
traceback.print_exc()
sys.stderr.write(u'ERROR: unable to open cookie file\n')
sys.exit(101)
# Set user agent # Set user agent
if opts.user_agent is not None: if opts.user_agent is not None:
std_headers['User-Agent'] = opts.user_agent std_headers['User-Agent'] = opts.user_agent
@ -353,6 +359,16 @@ def _real_main():
if numeric_limit is None: if numeric_limit is None:
parser.error(u'invalid rate limit specified') parser.error(u'invalid rate limit specified')
opts.ratelimit = numeric_limit opts.ratelimit = numeric_limit
if opts.min_filesize is not None:
numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
if numeric_limit is None:
parser.error(u'invalid min_filesize specified')
opts.min_filesize = numeric_limit
if opts.max_filesize is not None:
numeric_limit = FileDownloader.parse_bytes(opts.max_filesize)
if numeric_limit is None:
parser.error(u'invalid max_filesize specified')
opts.max_filesize = numeric_limit
if opts.retries is not None: if opts.retries is not None:
try: try:
opts.retries = int(opts.retries) opts.retries = int(opts.retries)
@ -442,6 +458,8 @@ def _real_main():
'verbose': opts.verbose, 'verbose': opts.verbose,
'test': opts.test, 'test': opts.test,
'keepvideo': opts.keepvideo, 'keepvideo': opts.keepvideo,
'min_filesize': opts.min_filesize,
'max_filesize': opts.max_filesize
}) })
if opts.verbose: if opts.verbose:

View File

@ -280,6 +280,12 @@ class AttrParser(compat_html_parser.HTMLParser):
lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]] lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
lines[-1] = lines[-1][:self.result[2][1]] lines[-1] = lines[-1][:self.result[2][1]]
return '\n'.join(lines).strip() return '\n'.join(lines).strip()
# Hack for https://github.com/rg3/youtube-dl/issues/662
if sys.version_info < (2, 7, 3):
AttrParser.parse_endtag = (lambda self, i:
i + len("</scr'+'ipt>")
if self.rawdata[i:].startswith("</scr'+'ipt>")
else compat_html_parser.HTMLParser.parse_endtag(self, i))
def get_element_by_id(id, html): def get_element_by_id(id, html):
"""Return the content of the tag with the specified ID in the passed HTML document""" """Return the content of the tag with the specified ID in the passed HTML document"""

View File

@ -1,2 +1,2 @@
__version__ = '2013.01.13' __version__ = '2013.02.02'