186 lines
6.8 KiB
Python
186 lines
6.8 KiB
Python
""" Splice together a planet from a cache of feed entries """
|
|
import glob, os, time, shutil
|
|
from xml.dom import minidom
|
|
import planet, config, feedparser, reconstitute, shell
|
|
from reconstitute import createTextElement, date
|
|
from spider import filename
|
|
from planet import idindex
|
|
|
|
def splice():
|
|
""" Splice together a planet from a cache of entries """
|
|
import planet
|
|
log = planet.logger
|
|
|
|
log.info("Loading cached data")
|
|
cache = config.cache_directory()
|
|
dir=[(os.stat(file).st_mtime,file) for file in glob.glob(cache+"/*")
|
|
if not os.path.isdir(file)]
|
|
dir.sort()
|
|
dir.reverse()
|
|
|
|
max_items=max([config.items_per_page(templ)
|
|
for templ in config.template_files() or ['Planet']])
|
|
|
|
doc = minidom.parseString('<feed xmlns="http://www.w3.org/2005/Atom"/>')
|
|
feed = doc.documentElement
|
|
|
|
# insert feed information
|
|
createTextElement(feed, 'title', config.name())
|
|
date(feed, 'updated', time.gmtime())
|
|
gen = createTextElement(feed, 'generator', config.generator())
|
|
gen.setAttribute('uri', config.generator_uri())
|
|
|
|
author = doc.createElement('author')
|
|
createTextElement(author, 'name', config.owner_name())
|
|
createTextElement(author, 'email', config.owner_email())
|
|
feed.appendChild(author)
|
|
|
|
if config.feed():
|
|
createTextElement(feed, 'id', config.feed())
|
|
link = doc.createElement('link')
|
|
link.setAttribute('rel', 'self')
|
|
link.setAttribute('href', config.feed())
|
|
if config.feedtype():
|
|
link.setAttribute('type', "application/%s+xml" % config.feedtype())
|
|
feed.appendChild(link)
|
|
|
|
if config.link():
|
|
link = doc.createElement('link')
|
|
link.setAttribute('rel', 'alternate')
|
|
link.setAttribute('href', config.link())
|
|
feed.appendChild(link)
|
|
|
|
# insert subscription information
|
|
sub_ids = []
|
|
feed.setAttribute('xmlns:planet',planet.xmlns)
|
|
sources = config.cache_sources_directory()
|
|
for sub in config.subscriptions():
|
|
data=feedparser.parse(filename(sources,sub))
|
|
if data.feed.has_key('id'): sub_ids.append(data.feed.id)
|
|
if not data.feed: continue
|
|
|
|
# warn on missing links
|
|
if not data.feed.has_key('planet_message'):
|
|
if not data.feed.has_key('links'): data.feed['links'] = []
|
|
|
|
for link in data.feed.links:
|
|
if link.rel == 'self': break
|
|
else:
|
|
log.debug('missing self link for ' + sub)
|
|
|
|
for link in data.feed.links:
|
|
if link.rel == 'alternate' and 'html' in link.type: break
|
|
else:
|
|
log.debug('missing html link for ' + sub)
|
|
|
|
xdoc=minidom.parseString('''<planet:source xmlns:planet="%s"
|
|
xmlns="http://www.w3.org/2005/Atom"/>\n''' % planet.xmlns)
|
|
reconstitute.source(xdoc.documentElement, data.feed, None, None)
|
|
feed.appendChild(xdoc.documentElement)
|
|
|
|
index = idindex.open()
|
|
|
|
# insert entry information
|
|
items = 0
|
|
count = {}
|
|
atomNS='http://www.w3.org/2005/Atom'
|
|
new_feed_items = config.new_feed_items()
|
|
for mtime,file in dir:
|
|
if index != None:
|
|
base = os.path.basename(file)
|
|
if index.has_key(base) and index[base] not in sub_ids: continue
|
|
|
|
try:
|
|
entry=minidom.parse(file)
|
|
|
|
# verify that this entry is currently subscribed to and that the
|
|
# number of entries contributed by this feed does not exceed
|
|
# config.new_feed_items
|
|
entry.normalize()
|
|
sources = entry.getElementsByTagNameNS(atomNS, 'source')
|
|
if sources:
|
|
ids = sources[0].getElementsByTagName('id')
|
|
if ids:
|
|
id = ids[0].childNodes[0].nodeValue
|
|
count[id] = count.get(id,0) + 1
|
|
if new_feed_items and count[id] > new_feed_items: continue
|
|
|
|
if id not in sub_ids:
|
|
ids = sources[0].getElementsByTagName('planet:id')
|
|
if not ids: continue
|
|
id = ids[0].childNodes[0].nodeValue
|
|
if id not in sub_ids:
|
|
log.warn('Skipping: ' + id)
|
|
if id not in sub_ids: continue
|
|
|
|
# add entry to feed
|
|
feed.appendChild(entry.documentElement)
|
|
items = items + 1
|
|
if items >= max_items: break
|
|
except:
|
|
log.error("Error parsing %s", file)
|
|
|
|
if index: index.close()
|
|
|
|
return doc
|
|
|
|
def apply(doc):
|
|
output_dir = config.output_dir()
|
|
if not os.path.exists(output_dir): os.makedirs(output_dir)
|
|
log = planet.logger
|
|
|
|
planet_filters = config.filters('Planet')
|
|
|
|
# Go-go-gadget-template
|
|
for template_file in config.template_files():
|
|
output_file = shell.run(template_file, doc)
|
|
|
|
# run any template specific filters
|
|
if config.filters(template_file) != planet_filters:
|
|
output = open(output_file).read()
|
|
for filter in config.filters(template_file):
|
|
if filter in planet_filters: continue
|
|
if filter.find('>')>0:
|
|
# tee'd output
|
|
filter,dest = filter.split('>',1)
|
|
tee = shell.run(filter.strip(), output, mode="filter")
|
|
if tee:
|
|
output_dir = planet.config.output_dir()
|
|
dest_file = os.path.join(output_dir, dest.strip())
|
|
dest_file = open(dest_file,'w')
|
|
dest_file.write(tee)
|
|
dest_file.close()
|
|
else:
|
|
# pipe'd output
|
|
output = shell.run(filter, output, mode="filter")
|
|
if not output:
|
|
os.unlink(output_file)
|
|
break
|
|
else:
|
|
handle = open(output_file,'w')
|
|
handle.write(output)
|
|
handle.close()
|
|
|
|
# Process bill of materials
|
|
for copy_file in config.bill_of_materials():
|
|
dest = os.path.join(output_dir, copy_file)
|
|
for template_dir in config.template_directories():
|
|
source = os.path.join(template_dir, copy_file)
|
|
if os.path.exists(source): break
|
|
else:
|
|
log.error('Unable to locate %s', copy_file)
|
|
log.info("Template search path:")
|
|
for template_dir in config.template_directories():
|
|
log.info(" %s", os.path.realpath(template_dir))
|
|
continue
|
|
|
|
mtime = os.stat(source).st_mtime
|
|
if not os.path.exists(dest) or os.stat(dest).st_mtime < mtime:
|
|
dest_dir = os.path.split(dest)[0]
|
|
if not os.path.exists(dest_dir): os.makedirs(dest_dir)
|
|
|
|
log.info("Copying %s to %s", source, dest)
|
|
if os.path.exists(dest): os.chmod(dest, 0644)
|
|
shutil.copyfile(source, dest)
|
|
shutil.copystat(source, dest)
|