planet/planet/config.py

"""
Planet Configuration

This module encapsulates all planet configuration.  This is not a generic
configuration parser, it knows everything about configuring a planet - from
the structure of the ini file, to knowledge of data types, even down to
what are the defaults.

Usage:
  import config
  config.load('config.ini')

  # administrative / structural information
  print config.template_files()
  print config.subscriptions()

  # planet wide configuration
  print config.name()
  print config.link()

  # per template configuration
  print config.days_per_page('atom.xml.tmpl')
  print config.encoding('index.html.tmpl')

Todo:
  * error handling (example: no planet section)
"""

import os, sys, re, urllib
from ConfigParser import ConfigParser
from urlparse import urljoin

parser = ConfigParser()

planet_predefined_options = ['filters']

def __init__():
    """define the struture of an ini file"""
    import config

    # get an option from a section
    def get(section, option, default):
        if section and parser.has_option(section, option):
            return parser.get(section, option)
        elif parser.has_option('Planet', option):
            if option == 'log_format':
                return parser.get('Planet', option, raw=True)
            return parser.get('Planet', option)
        else:
            return default

    # expand %(var) in lists
    def expand(list):
        output = []
        wild = re.compile('^(.*)#{(\w+)}(.*)$')
        for file in list.split():
            match = wild.match(file)
            if match:
                pre,var,post = match.groups()
                for sub in subscriptions():
                    value = feed_options(sub).get(var,None)
                    if value:
                        output.append(pre+value+post)
            else:
                output.append(file)
        return output

    # define a string planet-level variable
    def define_planet(name, default):
        setattr(config, name, lambda default=default: get(None,name,default))
        planet_predefined_options.append(name)

    # define a list planet-level variable
    def define_planet_int(name, default=0):
        setattr(config, name, lambda : int(get(None,name,default)))
        planet_predefined_options.append(name)

    # define a list planet-level variable
    def define_planet_list(name, default=''):
        setattr(config, name, lambda : expand(get(None,name,default)))
        planet_predefined_options.append(name)

    # define a string template-level variable
    def define_tmpl(name, default):
        setattr(config, name, lambda section, default=default:
            get(section,name,default))

    # define an int template-level variable
    def define_tmpl_int(name, default):
        setattr(config, name, lambda section, default=default:
            int(get(section,name,default)))

    # planet wide options
    define_planet('name', "Unconfigured Planet")
    define_planet('link', '')
    define_planet('cache_directory', "cache")
    define_planet('log_level', "WARNING")
    define_planet('log_format', "%(levelname)s:%(name)s:%(message)s")
    define_planet('date_format', "%B %d, %Y %I:%M %p")
    define_planet('new_date_format', "%B %d, %Y")
    define_planet('generator', 'Venus')
    define_planet('generator_uri', 'http://intertwingly.net/code/venus/')
    define_planet('owner_name', 'Anonymous Coward')
    define_planet('owner_email', '')
    define_planet('output_theme', '')
    define_planet('output_dir', 'output')
    define_planet('spider_threads', 0)

    define_planet_int('new_feed_items', 0)
    define_planet_int('feed_timeout', 20)
    define_planet_int('cache_keep_entries', 10)

    define_planet_list('template_files')
    define_planet_list('bill_of_materials')
    define_planet_list('template_directories', '.')
    define_planet_list('filter_directories')

    # template options
    define_tmpl_int('days_per_page', 0)
    define_tmpl_int('items_per_page', 60)
    define_tmpl_int('activity_threshold', 0)
    define_tmpl('encoding', 'utf-8')
    define_tmpl('content_type', 'utf-8')
    define_tmpl('ignore_in_feed', '')
    define_tmpl('name_type', '')
    define_tmpl('title_type', '')
    define_tmpl('summary_type', '')
    define_tmpl('content_type', '')
    define_tmpl('future_dates', 'keep')
    define_tmpl('xml_base', '')
    define_tmpl('filter', None)
    define_tmpl('exclude', None)

def load(config_file):
    """ initialize and load a configuration"""
    global parser
    parser = ConfigParser()
    parser.read(config_file)

    import config, planet
    from planet import opml, foaf
    log = planet.getLogger(config.log_level(),config.log_format())

    # Theme support
    theme = config.output_theme()
    if theme:
        for path in ("", os.path.join(sys.path[0],'themes')):
            theme_dir = os.path.join(path,theme)
            theme_file = os.path.join(theme_dir,'config.ini')
            if os.path.exists(theme_file):
                # initial search list for theme directories
                dirs = config.template_directories()
                if theme_dir not in dirs:
                    dirs.append(theme_dir)
                if os.path.dirname(config_file) not in dirs:
                    dirs.append(os.path.dirname(config_file))

                # read in the theme
                parser = ConfigParser()
                parser.read(theme_file)
                bom = config.bill_of_materials()

                # complete search list for theme directories
                dirs += [os.path.join(theme_dir,dir) for dir in
                    config.template_directories() if dir not in dirs]

                # merge configurations, allowing current one to override theme
                template_files = config.template_files()
                parser.set('Planet','template_files','')
                parser.read(config_file)
                for file in config.bill_of_materials():
                    if not file in bom: bom.append(file)
                parser.set('Planet', 'bill_of_materials', ' '.join(bom))
                parser.set('Planet', 'template_directories', ' '.join(dirs))
                parser.set('Planet', 'template_files',
                   ' '.join(template_files + config.template_files()))
                break
        else:
            log.error('Unable to find theme %s', theme)

    # Filter support
    dirs = config.filter_directories()
    filter_dir = os.path.join(sys.path[0],'filters')
    if filter_dir not in dirs and os.path.exists(filter_dir):
        parser.set('Planet', 'filter_directories', ' '.join(dirs+[filter_dir]))

    # Reading list support
    reading_lists = config.reading_lists()
    if reading_lists:
        if not os.path.exists(config.cache_lists_directory()):
            os.makedirs(config.cache_lists_directory())

        def data2config(data, cached_config):
                if content_type(list).find('opml')>=0:
                    opml.opml2config(data, cached_config)
                elif content_type(list).find('foaf')>=0:
                    foaf.foaf2config(data, cached_config)
                else:
                    from planet import shell
                    import StringIO
                    cached_config.readfp(StringIO.StringIO(shell.run(
                        content_type(list), data.getvalue(), mode="filter")))

                if cached_config.sections() in [[], [list]]:
                    raise Exception

        for list in reading_lists:
            downloadReadingList(list, parser, data2config)

def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=True):
    from planet import logger
    import config
    try:

        import urllib2, StringIO
        from planet.spider import filename

        # list cache file name
        cache_filename = filename(config.cache_lists_directory(), list)

        # retrieve list options (e.g., etag, last-modified) from cache
        options = {}

        # add original options
        for key in orig_config.options(list):
            options[key] = orig_config.get(list, key)

        try:
            if use_cache:
                cached_config = ConfigParser()
                cached_config.read(cache_filename)
                for option in cached_config.options(list):
                     options[option] = cached_config.get(list,option)
        except:
            pass

        cached_config = ConfigParser()
        cached_config.add_section(list)
        for key, value in options.items():
            cached_config.set(list, key, value)

        # read list
        curdir=getattr(os.path, 'curdir', '.')
        if sys.platform.find('win') < 0:
            base = urljoin('file:', os.path.abspath(curdir))
        else:
            path = os.path.abspath(os.path.curdir)
            base = urljoin('file:///', path.replace(':','|').replace('\\','/'))

        request = urllib2.Request(urljoin(base + '/', list))
        if options.has_key("etag"):
            request.add_header('If-None-Match', options['etag'])
        if options.has_key("last-modified"):
            request.add_header('If-Modified-Since',
                options['last-modified'])
        response = urllib2.urlopen(request)
        if response.headers.has_key('etag'):
            cached_config.set(list, 'etag', response.headers['etag'])
        if response.headers.has_key('last-modified'):
            cached_config.set(list, 'last-modified',
                response.headers['last-modified'])

        # convert to config.ini
        data = StringIO.StringIO(response.read())

        if callback: callback(data, cached_config)

        # write to cache
        if use_cache:
            cache = open(cache_filename, 'w')
            cached_config.write(cache)
            cache.close()

        # re-parse and proceed
        logger.debug("Using %s readinglist", list)
        if re_read:
            if use_cache:
                orig_config.read(cache_filename)
            else:
                cdata = StringIO.StringIO()
                cached_config.write(cdata)
                cdata.seek(0)
                orig_config.readfp(cdata)
    except:
        try:
            if re_read:
                if use_cache:
                    if not orig_config.read(cache_filename): raise Exception()
                else:
                    cdata = StringIO.StringIO()
                    cached_config.write(cdata)
                    cdata.seek(0)
                    orig_config.readfp(cdata)
                logger.info("Using cached %s readinglist", list)
        except:
            logger.exception("Unable to read %s readinglist", list)

def http_cache_directory():
    if parser.has_option('Planet', 'http_cache_directory'):
        os.path.join(cache_directory(),
            parser.get('Planet', 'http_cache_directory'))
    else:
        return os.path.join(cache_directory(), "cache")

def cache_sources_directory():
    if parser.has_option('Planet', 'cache_sources_directory'):
        return os.path.join(cache_directory(),
            parser.get('Planet', 'cache_sources_directory'))
    else:
        return os.path.join(cache_directory(), 'sources')

def cache_lists_directory():
    if parser.has_option('Planet', 'cache_lists_directory'):
        parser.get('Planet', 'cache_lists_directory')
    else:
        return os.path.join(cache_directory(), 'lists')

def feed():
    if parser.has_option('Planet', 'feed'):
        return parser.get('Planet', 'feed')
    elif link():
        for template_file in template_files():
            name = os.path.splitext(os.path.basename(template_file))[0]
            if name.find('atom')>=0 or name.find('rss')>=0:
                return urljoin(link(), name)

def feedtype():
    if parser.has_option('Planet', 'feedtype'):
        parser.get('Planet', 'feedtype')
    elif feed() and feed().find('atom')>=0:
        return 'atom'
    elif feed() and feed().find('rss')>=0:
        return 'rss'

def subscriptions():
    """ list the feed subscriptions """
    return __builtins__['filter'](lambda feed: feed!='Planet' and
        feed not in template_files()+filters()+reading_lists(),
        parser.sections())

def reading_lists():
    """ list of lists of feed subscriptions """
    result = []
    for section in parser.sections():
        if parser.has_option(section, 'content_type'):
            type = parser.get(section, 'content_type')
            if type.find('opml')>=0 or type.find('foaf')>=0 or type.find('.')>=0:
                result.append(section)
    return result

def filters(section=None):
    filters = []
    if parser.has_option('Planet', 'filters'):
        filters += parser.get('Planet', 'filters').split()
    if section and parser.has_option(section, 'filters'):
        filters += parser.get(section, 'filters').split()
    if filter(section):
        filters.append('regexp_sifter.py?require=' +
            urllib.quote(filter(section)))
    if exclude(section):
        filters.append('regexp_sifter.py?exclude=' +
            urllib.quote(filter(section)))
    return filters

def planet_options():
    """ dictionary of planet wide options"""
    return dict(map(lambda opt: (opt,
        parser.get('Planet', opt, raw=(opt=="log_format"))),
        parser.options('Planet')))

def feed_options(section):
    """ dictionary of feed specific options"""
    import config
    options = dict([(key,value) for key,value in planet_options().items()
        if key not in planet_predefined_options])
    if parser.has_section(section):
        options.update(dict(map(lambda opt: (opt, parser.get(section,opt)),
            parser.options(section))))
    return options

def template_options(section):
    """ dictionary of template specific options"""
    return feed_options(section)

def write(file=sys.stdout):
    """ write out an updated template """
    print parser.write(file)