From 3e1c9bcb3eceec96005e6ed939fd16a769314ab8 Mon Sep 17 00:00:00 2001
From: Sam Ruby filters
+can be found
@@ -148,6 +151,7 @@ processed as templates. With Planet 2.0,
it is possible to override parameters like
items_per_page
on a per template basis, but at the current time Planet Venus doesn't
implement this.
Filters can be defined on a per-template basis, and will be used to post-process the output of the template.
[
filter]
Sections which are listed in [planet] filters
are
diff --git a/docs/filters.html b/docs/filters.html
index 865aa41..425d189 100644
--- a/docs/filters.html
+++ b/docs/filters.html
@@ -15,11 +15,13 @@ Anything written to stderr
is logged as an ERROR message. If no
stdout
is produced, the entry is not written to the cache or
processed further; in fact, if the entry had previously been written to the cache, it will be removed.
Input to a filter is a aggressively +
There are two types of filters supported by Venus, input and template.
+Input to an input filter is a aggressively normalized entry. For example, if a feed is RSS 1.0 with 10 items, the filter will be called ten times, each with a single Atom 1.0 entry, with all textConstructs expressed as XHTML, and everything encoded as UTF-8.
+Input to a template filter will be the output produced by the template.
You will find a small set of example filters in the filters directory. The
Notes
-
[planet]
section of your config.ini
+will be invoked on all feeds. Filters listed in individual
+[feed]
sections will only be invoked on those feeds.
+Filters listed in [template]
sections will be invoked on the
+output of that template.[planet]
section of your config.ini
-will be invoked on all feeds. Filters listed in individual
-[feed]
sections will only be invoked on those feeds..py
invokes
python. .xslt
involkes XSLT. .sed
and
.tmpl
(a.k.a. htmltmp) are also options. Other languages, like
diff --git a/planet/shell/__init__.py b/planet/shell/__init__.py
index 18c764a..7052454 100644
--- a/planet/shell/__init__.py
+++ b/planet/shell/__init__.py
@@ -30,7 +30,7 @@ def run(template_file, doc, mode='template'):
if not mode in logged_modes:
log.info("%s search path:", mode)
for template_dir in dirs:
- log.info(" %s", os.path.realpath(template_dir))
+ log.error(" %s", os.path.realpath(template_dir))
logged_modes.append(mode)
return
template_resolved = os.path.realpath(template_resolved)
@@ -60,3 +60,4 @@ def run(template_file, doc, mode='template'):
output_dir = planet.config.output_dir()
output_file = os.path.join(output_dir, base)
module.run(template_resolved, doc, output_file, options)
+ return output_file
diff --git a/planet/shell/tmpl.py b/planet/shell/tmpl.py
index e6d3745..4f4d822 100644
--- a/planet/shell/tmpl.py
+++ b/planet/shell/tmpl.py
@@ -241,12 +241,15 @@ def run(script, doc, output_file=None, options={}):
for key,value in template_info(doc).items():
tp.set(key, value)
- reluri = os.path.splitext(os.path.basename(output_file))[0]
- tp.set('url', urlparse.urljoin(config.link(),reluri))
+ if output_file:
+ reluri = os.path.splitext(os.path.basename(output_file))[0]
+ tp.set('url', urlparse.urljoin(config.link(),reluri))
- output = open(output_file, "w")
- output.write(tp.process(template))
- output.close()
+ output = open(output_file, "w")
+ output.write(tp.process(template))
+ output.close()
+ else:
+ return tp.process(template)
if __name__ == '__main__':
sys.path.insert(0, os.path.split(sys.path[0])[0])
diff --git a/planet/splice.py b/planet/splice.py
index b5e9d45..26aa97d 100644
--- a/planet/splice.py
+++ b/planet/splice.py
@@ -111,9 +111,25 @@ def apply(doc):
if not os.path.exists(output_dir): os.makedirs(output_dir)
log = planet.getLogger(config.log_level(),config.log_format())
+ planet_filters = config.filters('Planet')
+
# Go-go-gadget-template
for template_file in config.template_files():
- shell.run(template_file, doc)
+ output_file = shell.run(template_file, doc)
+
+ # run any template specific filters
+ if config.filters(template_file) != planet_filters:
+ output = open(output_file).read()
+ for filter in config.filters(template_file):
+ if filter in planet_filters: continue
+ output = shell.run(filter, output, mode="filter")
+ if not output:
+ os.unlink(output_file)
+ break
+ else:
+ handle = open(output_file,'w')
+ handle.write(output)
+ handle.close()
# Process bill of materials
for copy_file in config.bill_of_materials():
diff --git a/tests/data/apply/config-filter.ini b/tests/data/apply/config-filter.ini
new file mode 100644
index 0000000..6bea6db
--- /dev/null
+++ b/tests/data/apply/config-filter.ini
@@ -0,0 +1,21 @@
+[Planet]
+output_theme = asf
+output_dir = tests/work/apply
+name = test planet
+cache_directory = tests/work/spider/cache
+filter_directories = tests/data/apply
+
+[index.html.xslt]
+filters = rebase.py?base=http://example.com/
+
+[tests/data/spider/testfeed0.atom]
+name = not found
+
+[tests/data/spider/testfeed1b.atom]
+name = one
+
+[tests/data/spider/testfeed2.atom]
+name = two
+
+[tests/data/spider/testfeed3.rss]
+name = three
diff --git a/tests/data/apply/rebase.py b/tests/data/apply/rebase.py
new file mode 100644
index 0000000..9cd77d1
--- /dev/null
+++ b/tests/data/apply/rebase.py
@@ -0,0 +1,24 @@
+# make href attributes absolute, using base argument passed in
+
+import sys
+try:
+ base = sys.argv[sys.argv.index('--base')+1]
+except:
+ sys.stderr.write('Missing required argument: base\n')
+ sys.exit()
+
+from xml.dom import minidom, Node
+from urlparse import urljoin
+
+def rebase(node, newbase):
+ if node.hasAttribute('href'):
+ href=node.getAttribute('href')
+ if href != urljoin(base,href):
+ node.setAttribute('href', urljoin(base,href))
+ for child in node.childNodes:
+ if child.nodeType == Node.ELEMENT_NODE:
+ rebase(child, newbase)
+
+doc = minidom.parse(sys.stdin)
+rebase(doc.documentElement, base)
+print doc.toxml('utf-8')
diff --git a/tests/test_apply.py b/tests/test_apply.py
index dce69c1..e151fba 100644
--- a/tests/test_apply.py
+++ b/tests/test_apply.py
@@ -63,6 +63,14 @@ class ApplyTest(unittest.TestCase):
self.assertTrue(html.find(
'Venus
')>=0)
+ def test_apply_filter(self):
+ config.load(configfile % 'filter')
+ splice.apply(self.feeddata)
+
+ # verify that index.html is well formed, has content, and xml:lang
+ html = open(os.path.join(workdir, 'index.html')).read()
+ self.assertTrue(html.find(' href="http://example.com/default.css"')>=0)
+
try:
import libxml2
except ImportError: