planet/planet/config.py
2007-03-07 13:51:23 -05:00

388 lines
14 KiB
Python

"""
Planet Configuration
This module encapsulates all planet configuration. This is not a generic
configuration parser, it knows everything about configuring a planet - from
the structure of the ini file, to knowledge of data types, even down to
what are the defaults.
Usage:
import config
config.load('config.ini')
# administrative / structural information
print config.template_files()
print config.subscriptions()
# planet wide configuration
print config.name()
print config.link()
# per template configuration
print config.days_per_page('atom.xml.tmpl')
print config.encoding('index.html.tmpl')
Todo:
* error handling (example: no planet section)
"""
import os, sys, re, urllib
from ConfigParser import ConfigParser
from urlparse import urljoin
parser = ConfigParser()
planet_predefined_options = ['filters']
def __init__():
"""define the struture of an ini file"""
import config
# get an option from a section
def get(section, option, default):
if section and parser.has_option(section, option):
return parser.get(section, option)
elif parser.has_option('Planet', option):
if option == 'log_format':
return parser.get('Planet', option, raw=True)
return parser.get('Planet', option)
else:
return default
# expand %(var) in lists
def expand(list):
output = []
wild = re.compile('^(.*)#{(\w+)}(.*)$')
for file in list.split():
match = wild.match(file)
if match:
pre,var,post = match.groups()
for sub in subscriptions():
value = feed_options(sub).get(var,None)
if value:
output.append(pre+value+post)
else:
output.append(file)
return output
# define a string planet-level variable
def define_planet(name, default):
setattr(config, name, lambda default=default: get(None,name,default))
planet_predefined_options.append(name)
# define a list planet-level variable
def define_planet_int(name, default=0):
setattr(config, name, lambda : int(get(None,name,default)))
planet_predefined_options.append(name)
# define a list planet-level variable
def define_planet_list(name, default=''):
setattr(config, name, lambda : expand(get(None,name,default)))
planet_predefined_options.append(name)
# define a string template-level variable
def define_tmpl(name, default):
setattr(config, name, lambda section, default=default:
get(section,name,default))
# define an int template-level variable
def define_tmpl_int(name, default):
setattr(config, name, lambda section, default=default:
int(get(section,name,default)))
# planet wide options
define_planet('name', "Unconfigured Planet")
define_planet('link', '')
define_planet('cache_directory', "cache")
define_planet('log_level', "WARNING")
define_planet('log_format', "%(levelname)s:%(name)s:%(message)s")
define_planet('date_format', "%B %d, %Y %I:%M %p")
define_planet('new_date_format', "%B %d, %Y")
define_planet('generator', 'Venus')
define_planet('generator_uri', 'http://intertwingly.net/code/venus/')
define_planet('owner_name', 'Anonymous Coward')
define_planet('owner_email', '')
define_planet('output_theme', '')
define_planet('output_dir', 'output')
define_planet('spider_threads', 0)
define_planet_int('new_feed_items', 0)
define_planet_int('feed_timeout', 20)
define_planet_int('cache_keep_entries', 10)
define_planet_list('template_files')
define_planet_list('bill_of_materials')
define_planet_list('template_directories', '.')
define_planet_list('filter_directories')
# template options
define_tmpl_int('days_per_page', 0)
define_tmpl_int('items_per_page', 60)
define_tmpl_int('activity_threshold', 0)
define_tmpl('encoding', 'utf-8')
define_tmpl('content_type', 'utf-8')
define_tmpl('ignore_in_feed', '')
define_tmpl('name_type', '')
define_tmpl('title_type', '')
define_tmpl('summary_type', '')
define_tmpl('content_type', '')
define_tmpl('future_dates', 'keep')
define_tmpl('xml_base', '')
define_tmpl('filter', None)
define_tmpl('exclude', None)
def load(config_file):
""" initialize and load a configuration"""
global parser
parser = ConfigParser()
parser.read(config_file)
import config, planet
from planet import opml, foaf
log = planet.getLogger(config.log_level(),config.log_format())
# Theme support
theme = config.output_theme()
if theme:
for path in ("", os.path.join(sys.path[0],'themes')):
theme_dir = os.path.join(path,theme)
theme_file = os.path.join(theme_dir,'config.ini')
if os.path.exists(theme_file):
# initial search list for theme directories
dirs = config.template_directories()
if theme_dir not in dirs:
dirs.append(theme_dir)
if os.path.dirname(config_file) not in dirs:
dirs.append(os.path.dirname(config_file))
# read in the theme
parser = ConfigParser()
parser.read(theme_file)
bom = config.bill_of_materials()
# complete search list for theme directories
dirs += [os.path.join(theme_dir,dir) for dir in
config.template_directories() if dir not in dirs]
# merge configurations, allowing current one to override theme
template_files = config.template_files()
parser.set('Planet','template_files','')
parser.read(config_file)
for file in config.bill_of_materials():
if not file in bom: bom.append(file)
parser.set('Planet', 'bill_of_materials', ' '.join(bom))
parser.set('Planet', 'template_directories', ' '.join(dirs))
parser.set('Planet', 'template_files',
' '.join(template_files + config.template_files()))
break
else:
log.error('Unable to find theme %s', theme)
# Filter support
dirs = config.filter_directories()
filter_dir = os.path.join(sys.path[0],'filters')
if filter_dir not in dirs and os.path.exists(filter_dir):
parser.set('Planet', 'filter_directories', ' '.join(dirs+[filter_dir]))
# Reading list support
reading_lists = config.reading_lists()
if reading_lists:
if not os.path.exists(config.cache_lists_directory()):
os.makedirs(config.cache_lists_directory())
def data2config(data, cached_config):
if content_type(list).find('opml')>=0:
opml.opml2config(data, cached_config)
elif content_type(list).find('foaf')>=0:
foaf.foaf2config(data, cached_config)
else:
from planet import shell
import StringIO
cached_config.readfp(StringIO.StringIO(shell.run(
content_type(list), data.getvalue(), mode="filter")))
if cached_config.sections() in [[], [list]]:
raise Exception
for list in reading_lists:
downloadReadingList(list, parser, data2config)
def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=True):
from planet import logger
import config
try:
import urllib2, StringIO
from planet.spider import filename
# list cache file name
cache_filename = filename(config.cache_lists_directory(), list)
# retrieve list options (e.g., etag, last-modified) from cache
options = {}
# add original options
for key in orig_config.options(list):
options[key] = orig_config.get(list, key)
try:
if use_cache:
cached_config = ConfigParser()
cached_config.read(cache_filename)
for option in cached_config.options(list):
options[option] = cached_config.get(list,option)
except:
pass
cached_config = ConfigParser()
cached_config.add_section(list)
for key, value in options.items():
cached_config.set(list, key, value)
# read list
curdir=getattr(os.path, 'curdir', '.')
if sys.platform.find('win') < 0:
base = urljoin('file:', os.path.abspath(curdir))
else:
path = os.path.abspath(os.path.curdir)
base = urljoin('file:///', path.replace(':','|').replace('\\','/'))
request = urllib2.Request(urljoin(base + '/', list))
if options.has_key("etag"):
request.add_header('If-None-Match', options['etag'])
if options.has_key("last-modified"):
request.add_header('If-Modified-Since',
options['last-modified'])
response = urllib2.urlopen(request)
if response.headers.has_key('etag'):
cached_config.set(list, 'etag', response.headers['etag'])
if response.headers.has_key('last-modified'):
cached_config.set(list, 'last-modified',
response.headers['last-modified'])
# convert to config.ini
data = StringIO.StringIO(response.read())
if callback: callback(data, cached_config)
# write to cache
if use_cache:
cache = open(cache_filename, 'w')
cached_config.write(cache)
cache.close()
# re-parse and proceed
logger.debug("Using %s readinglist", list)
if re_read:
if use_cache:
orig_config.read(cache_filename)
else:
cdata = StringIO.StringIO()
cached_config.write(cdata)
cdata.seek(0)
orig_config.readfp(cdata)
except:
try:
if re_read:
if use_cache:
if not orig_config.read(cache_filename): raise Exception()
else:
cdata = StringIO.StringIO()
cached_config.write(cdata)
cdata.seek(0)
orig_config.readfp(cdata)
logger.info("Using cached %s readinglist", list)
except:
logger.exception("Unable to read %s readinglist", list)
def http_cache_directory():
if parser.has_option('Planet', 'http_cache_directory'):
os.path.join(cache_directory(),
parser.get('Planet', 'http_cache_directory'))
else:
return os.path.join(cache_directory(), "cache")
def cache_sources_directory():
if parser.has_option('Planet', 'cache_sources_directory'):
return os.path.join(cache_directory(),
parser.get('Planet', 'cache_sources_directory'))
else:
return os.path.join(cache_directory(), 'sources')
def cache_lists_directory():
if parser.has_option('Planet', 'cache_lists_directory'):
parser.get('Planet', 'cache_lists_directory')
else:
return os.path.join(cache_directory(), 'lists')
def feed():
if parser.has_option('Planet', 'feed'):
return parser.get('Planet', 'feed')
elif link():
for template_file in template_files():
name = os.path.splitext(os.path.basename(template_file))[0]
if name.find('atom')>=0 or name.find('rss')>=0:
return urljoin(link(), name)
def feedtype():
if parser.has_option('Planet', 'feedtype'):
parser.get('Planet', 'feedtype')
elif feed() and feed().find('atom')>=0:
return 'atom'
elif feed() and feed().find('rss')>=0:
return 'rss'
def subscriptions():
""" list the feed subscriptions """
return __builtins__['filter'](lambda feed: feed!='Planet' and
feed not in template_files()+filters()+reading_lists(),
parser.sections())
def reading_lists():
""" list of lists of feed subscriptions """
result = []
for section in parser.sections():
if parser.has_option(section, 'content_type'):
type = parser.get(section, 'content_type')
if type.find('opml')>=0 or type.find('foaf')>=0 or type.find('.')>=0:
result.append(section)
return result
def filters(section=None):
filters = []
if parser.has_option('Planet', 'filters'):
filters += parser.get('Planet', 'filters').split()
if section and parser.has_option(section, 'filters'):
filters += parser.get(section, 'filters').split()
if filter(section):
filters.append('regexp_sifter.py?require=' +
urllib.quote(filter(section)))
if exclude(section):
filters.append('regexp_sifter.py?exclude=' +
urllib.quote(filter(section)))
return filters
def planet_options():
""" dictionary of planet wide options"""
return dict(map(lambda opt: (opt,
parser.get('Planet', opt, raw=(opt=="log_format"))),
parser.options('Planet')))
def feed_options(section):
""" dictionary of feed specific options"""
import config
options = dict([(key,value) for key,value in planet_options().items()
if key not in planet_predefined_options])
if parser.has_section(section):
options.update(dict(map(lambda opt: (opt, parser.get(section,opt)),
parser.options(section))))
return options
def template_options(section):
""" dictionary of template specific options"""
return feed_options(section)
def write(file=sys.stdout):
""" write out an updated template """
print parser.write(file)