From 3e1c9bcb3eceec96005e6ed939fd16a769314ab8 Mon Sep 17 00:00:00 2001
From: Sam Ruby <rubys@intertwingly.net>
Date: Sat, 14 Apr 2007 10:04:22 -0400
Subject: [PATCH] Output filters

---
 docs/config.html                   |  4 ++++
 docs/filters.html                  | 18 +++++++++++-------
 planet/shell/__init__.py           |  3 ++-
 planet/shell/tmpl.py               | 13 ++++++++-----
 planet/splice.py                   | 18 +++++++++++++++++-
 tests/data/apply/config-filter.ini | 21 +++++++++++++++++++++
 tests/data/apply/rebase.py         | 24 ++++++++++++++++++++++++
 tests/test_apply.py                |  8 ++++++++
 8 files changed, 95 insertions(+), 14 deletions(-)
 create mode 100644 tests/data/apply/config-filter.ini
 create mode 100644 tests/data/apply/rebase.py
diff --git a/docs/config.html b/docs/config.html
index abb3f19..4a08ed7 100644
--- a/docs/config.html
+++ b/docs/config.html
@@ -68,6 +68,9 @@ can be found</dd>
 <dt><ins>filters</ins></dt>
 <dd>Space-separated list of <a href="filters.html">filters</a> to apply to
 each entry</dd>
+<dt><ins>filter_directories</ins></dt>
+<dd>Space-separated list of directories in which <code>filters</code>
+can be found</dd>
 
 </dl>
 <dl class="compact code">
@@ -148,6 +151,7 @@ processed as <a href="templates.html">templates</a>.  With Planet 2.0,
 it is possible to override parameters like <code>items_per_page</code>
 on a per template basis, but at the current time Planet Venus doesn't
 implement this.</p>
+<p><ins><a href="filters.html">Filters</a> can be defined on a per-template basis, and will be used to post-process the output of the template.</ins></p>
 
 <h3 id="filter"><code>[</code><em>filter</em><code>]</code></h3>
 <p>Sections which are listed in <code>[planet] filters</code> are
diff --git a/docs/filters.html b/docs/filters.html
index 865aa41..425d189 100644
--- a/docs/filters.html
+++ b/docs/filters.html
@@ -15,11 +15,13 @@ Anything written to <code>stderr</code> is logged as an ERROR message.  If no
 <code>stdout</code> is produced, the entry is not written to the cache or
 processed further; in fact, if the entry had previously been written to the cache, it will be removed.</p>
 
-<p>Input to a filter is a aggressively
+<p>There are two types of filters supported by Venus, input and template.</p>
+<p>Input to an input filter is a aggressively
 <a href="normalization.html">normalized</a> entry.  For
 example, if a feed is RSS 1.0 with 10 items, the filter will be called ten
 times, each with a single Atom 1.0 entry, with all textConstructs
 expressed as XHTML, and everything encoded as UTF-8.</p>
+<p>Input to a template filter will be the output produced by the template.</p>
 
 <p>You will find a small set of example filters in the <a
 href="../filters">filters</a> directory.  The <a
@@ -54,8 +56,14 @@ instead of XPath expressions.</p>
 <h3>Notes</h3>
 
 <ul>
-<li>Filters are executed when a feed is fetched, and the results are placed
-into the cache.  Changing a configuration file alone is not sufficient to
+<li>Any filters listed in the <code>[planet]</code> section of your config.ini
+will be invoked on all feeds.  Filters listed in individual
+<code>[feed]</code> sections will only be invoked on those feeds.
+Filters listed in <code>[template]</code> sections will be invoked on the
+output of that template.</li>
+
+<li>Input filters are executed when a feed is fetched, and the results are
+placed into the cache.  Changing a configuration file alone is not sufficient to
 change the contents of the cache &mdash; typically that only occurs after
 a feed is modified.</li>
 
@@ -63,10 +71,6 @@ a feed is modified.</li>
 configuration file (think unix pipes). Planet wide filters are executed before
 feed specific filters.</li>
 
-<li>Any filters listed in the <code>[planet]</code> section of your config.ini
-will be invoked on all feeds.  Filters listed in individual
-<code>[feed]</code> sections will only be invoked on those feeds.</li>
-
 <li>The file extension of the filter is significant.  <code>.py</code> invokes
 python. <code>.xslt</code> involkes XSLT.  <code>.sed</code> and
 <code>.tmpl</code> (a.k.a. htmltmp) are also options. Other languages, like
diff --git a/planet/shell/__init__.py b/planet/shell/__init__.py
index 18c764a..7052454 100644
--- a/planet/shell/__init__.py
+++ b/planet/shell/__init__.py
@@ -30,7 +30,7 @@ def run(template_file, doc, mode='template'):
         if not mode in logged_modes:
             log.info("%s search path:", mode)
             for template_dir in dirs:
-                log.info("    %s", os.path.realpath(template_dir))
+                log.error("    %s", os.path.realpath(template_dir))
             logged_modes.append(mode)
         return
     template_resolved = os.path.realpath(template_resolved)
@@ -60,3 +60,4 @@ def run(template_file, doc, mode='template'):
         output_dir = planet.config.output_dir()
         output_file = os.path.join(output_dir, base)
         module.run(template_resolved, doc, output_file, options)
+        return output_file
diff --git a/planet/shell/tmpl.py b/planet/shell/tmpl.py
index e6d3745..4f4d822 100644
--- a/planet/shell/tmpl.py
+++ b/planet/shell/tmpl.py
@@ -241,12 +241,15 @@ def run(script, doc, output_file=None, options={}):
     for key,value in template_info(doc).items():
         tp.set(key, value)
 
-    reluri = os.path.splitext(os.path.basename(output_file))[0]
-    tp.set('url', urlparse.urljoin(config.link(),reluri))
+    if output_file:
+        reluri = os.path.splitext(os.path.basename(output_file))[0]
+        tp.set('url', urlparse.urljoin(config.link(),reluri))
 
-    output = open(output_file, "w")
-    output.write(tp.process(template))
-    output.close()
+        output = open(output_file, "w")
+        output.write(tp.process(template))
+        output.close()
+    else:
+        return tp.process(template)
 
 if __name__ == '__main__':
     sys.path.insert(0, os.path.split(sys.path[0])[0])
diff --git a/planet/splice.py b/planet/splice.py
index b5e9d45..26aa97d 100644
--- a/planet/splice.py
+++ b/planet/splice.py
@@ -111,9 +111,25 @@ def apply(doc):
     if not os.path.exists(output_dir): os.makedirs(output_dir)
     log = planet.getLogger(config.log_level(),config.log_format())
 
+    planet_filters = config.filters('Planet')
+
     # Go-go-gadget-template
     for template_file in config.template_files():
-        shell.run(template_file, doc)
+        output_file = shell.run(template_file, doc)
+
+        # run any template specific filters
+        if config.filters(template_file) != planet_filters:
+            output = open(output_file).read()
+            for filter in config.filters(template_file):
+                if filter in planet_filters: continue
+                output = shell.run(filter, output, mode="filter")
+                if not output:
+                    os.unlink(output_file)
+                    break
+            else:
+                handle = open(output_file,'w')
+                handle.write(output)
+                handle.close()
 
     # Process bill of materials
     for copy_file in config.bill_of_materials():
diff --git a/tests/data/apply/config-filter.ini b/tests/data/apply/config-filter.ini
new file mode 100644
index 0000000..6bea6db
--- /dev/null
+++ b/tests/data/apply/config-filter.ini
@@ -0,0 +1,21 @@
+[Planet]
+output_theme = asf
+output_dir = tests/work/apply
+name = test planet
+cache_directory = tests/work/spider/cache
+filter_directories = tests/data/apply
+
+[index.html.xslt]
+filters = rebase.py?base=http://example.com/
+
+[tests/data/spider/testfeed0.atom]
+name = not found
+
+[tests/data/spider/testfeed1b.atom]
+name = one
+
+[tests/data/spider/testfeed2.atom]
+name = two
+
+[tests/data/spider/testfeed3.rss]
+name = three
diff --git a/tests/data/apply/rebase.py b/tests/data/apply/rebase.py
new file mode 100644
index 0000000..9cd77d1
--- /dev/null
+++ b/tests/data/apply/rebase.py
@@ -0,0 +1,24 @@
+# make href attributes absolute, using base argument passed in
+
+import sys
+try:
+  base = sys.argv[sys.argv.index('--base')+1]
+except:
+  sys.stderr.write('Missing required argument: base\n')
+  sys.exit()
+
+from xml.dom import minidom, Node
+from urlparse import urljoin
+
+def rebase(node, newbase):
+  if node.hasAttribute('href'):
+    href=node.getAttribute('href')
+    if href != urljoin(base,href):
+      node.setAttribute('href', urljoin(base,href))
+  for child in node.childNodes:
+    if child.nodeType == Node.ELEMENT_NODE:
+      rebase(child, newbase)
+
+doc = minidom.parse(sys.stdin)
+rebase(doc.documentElement, base)
+print doc.toxml('utf-8')
diff --git a/tests/test_apply.py b/tests/test_apply.py
index dce69c1..e151fba 100644
--- a/tests/test_apply.py
+++ b/tests/test_apply.py
@@ -63,6 +63,14 @@ class ApplyTest(unittest.TestCase):
         self.assertTrue(html.find(
           '<h4><a href="http://example.com/2">Venus</a></h4>')>=0)
 
+    def test_apply_filter(self):
+        config.load(configfile % 'filter')
+        splice.apply(self.feeddata)
+
+        # verify that index.html is well formed, has content, and xml:lang
+        html = open(os.path.join(workdir, 'index.html')).read()
+        self.assertTrue(html.find(' href="http://example.com/default.css"')>=0)
+
 try:
     import libxml2
 except ImportError: