Output filters
This commit is contained in:
parent
1d20e8e74e
commit
3e1c9bcb3e
@ -68,6 +68,9 @@ can be found</dd>
|
||||
<dt><ins>filters</ins></dt>
|
||||
<dd>Space-separated list of <a href="filters.html">filters</a> to apply to
|
||||
each entry</dd>
|
||||
<dt><ins>filter_directories</ins></dt>
|
||||
<dd>Space-separated list of directories in which <code>filters</code>
|
||||
can be found</dd>
|
||||
|
||||
</dl>
|
||||
<dl class="compact code">
|
||||
@ -148,6 +151,7 @@ processed as <a href="templates.html">templates</a>. With Planet 2.0,
|
||||
it is possible to override parameters like <code>items_per_page</code>
|
||||
on a per template basis, but at the current time Planet Venus doesn't
|
||||
implement this.</p>
|
||||
<p><ins><a href="filters.html">Filters</a> can be defined on a per-template basis, and will be used to post-process the output of the template.</ins></p>
|
||||
|
||||
<h3 id="filter"><code>[</code><em>filter</em><code>]</code></h3>
|
||||
<p>Sections which are listed in <code>[planet] filters</code> are
|
||||
|
@ -15,11 +15,13 @@ Anything written to <code>stderr</code> is logged as an ERROR message. If no
|
||||
<code>stdout</code> is produced, the entry is not written to the cache or
|
||||
processed further; in fact, if the entry had previously been written to the cache, it will be removed.</p>
|
||||
|
||||
<p>Input to a filter is a aggressively
|
||||
<p>There are two types of filters supported by Venus, input and template.</p>
|
||||
<p>Input to an input filter is a aggressively
|
||||
<a href="normalization.html">normalized</a> entry. For
|
||||
example, if a feed is RSS 1.0 with 10 items, the filter will be called ten
|
||||
times, each with a single Atom 1.0 entry, with all textConstructs
|
||||
expressed as XHTML, and everything encoded as UTF-8.</p>
|
||||
<p>Input to a template filter will be the output produced by the template.</p>
|
||||
|
||||
<p>You will find a small set of example filters in the <a
|
||||
href="../filters">filters</a> directory. The <a
|
||||
@ -54,8 +56,14 @@ instead of XPath expressions.</p>
|
||||
<h3>Notes</h3>
|
||||
|
||||
<ul>
|
||||
<li>Filters are executed when a feed is fetched, and the results are placed
|
||||
into the cache. Changing a configuration file alone is not sufficient to
|
||||
<li>Any filters listed in the <code>[planet]</code> section of your config.ini
|
||||
will be invoked on all feeds. Filters listed in individual
|
||||
<code>[feed]</code> sections will only be invoked on those feeds.
|
||||
Filters listed in <code>[template]</code> sections will be invoked on the
|
||||
output of that template.</li>
|
||||
|
||||
<li>Input filters are executed when a feed is fetched, and the results are
|
||||
placed into the cache. Changing a configuration file alone is not sufficient to
|
||||
change the contents of the cache — typically that only occurs after
|
||||
a feed is modified.</li>
|
||||
|
||||
@ -63,10 +71,6 @@ a feed is modified.</li>
|
||||
configuration file (think unix pipes). Planet wide filters are executed before
|
||||
feed specific filters.</li>
|
||||
|
||||
<li>Any filters listed in the <code>[planet]</code> section of your config.ini
|
||||
will be invoked on all feeds. Filters listed in individual
|
||||
<code>[feed]</code> sections will only be invoked on those feeds.</li>
|
||||
|
||||
<li>The file extension of the filter is significant. <code>.py</code> invokes
|
||||
python. <code>.xslt</code> involkes XSLT. <code>.sed</code> and
|
||||
<code>.tmpl</code> (a.k.a. htmltmp) are also options. Other languages, like
|
||||
|
@ -30,7 +30,7 @@ def run(template_file, doc, mode='template'):
|
||||
if not mode in logged_modes:
|
||||
log.info("%s search path:", mode)
|
||||
for template_dir in dirs:
|
||||
log.info(" %s", os.path.realpath(template_dir))
|
||||
log.error(" %s", os.path.realpath(template_dir))
|
||||
logged_modes.append(mode)
|
||||
return
|
||||
template_resolved = os.path.realpath(template_resolved)
|
||||
@ -60,3 +60,4 @@ def run(template_file, doc, mode='template'):
|
||||
output_dir = planet.config.output_dir()
|
||||
output_file = os.path.join(output_dir, base)
|
||||
module.run(template_resolved, doc, output_file, options)
|
||||
return output_file
|
||||
|
@ -241,12 +241,15 @@ def run(script, doc, output_file=None, options={}):
|
||||
for key,value in template_info(doc).items():
|
||||
tp.set(key, value)
|
||||
|
||||
if output_file:
|
||||
reluri = os.path.splitext(os.path.basename(output_file))[0]
|
||||
tp.set('url', urlparse.urljoin(config.link(),reluri))
|
||||
|
||||
output = open(output_file, "w")
|
||||
output.write(tp.process(template))
|
||||
output.close()
|
||||
else:
|
||||
return tp.process(template)
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.path.insert(0, os.path.split(sys.path[0])[0])
|
||||
|
@ -111,9 +111,25 @@ def apply(doc):
|
||||
if not os.path.exists(output_dir): os.makedirs(output_dir)
|
||||
log = planet.getLogger(config.log_level(),config.log_format())
|
||||
|
||||
planet_filters = config.filters('Planet')
|
||||
|
||||
# Go-go-gadget-template
|
||||
for template_file in config.template_files():
|
||||
shell.run(template_file, doc)
|
||||
output_file = shell.run(template_file, doc)
|
||||
|
||||
# run any template specific filters
|
||||
if config.filters(template_file) != planet_filters:
|
||||
output = open(output_file).read()
|
||||
for filter in config.filters(template_file):
|
||||
if filter in planet_filters: continue
|
||||
output = shell.run(filter, output, mode="filter")
|
||||
if not output:
|
||||
os.unlink(output_file)
|
||||
break
|
||||
else:
|
||||
handle = open(output_file,'w')
|
||||
handle.write(output)
|
||||
handle.close()
|
||||
|
||||
# Process bill of materials
|
||||
for copy_file in config.bill_of_materials():
|
||||
|
21
tests/data/apply/config-filter.ini
Normal file
21
tests/data/apply/config-filter.ini
Normal file
@ -0,0 +1,21 @@
|
||||
[Planet]
|
||||
output_theme = asf
|
||||
output_dir = tests/work/apply
|
||||
name = test planet
|
||||
cache_directory = tests/work/spider/cache
|
||||
filter_directories = tests/data/apply
|
||||
|
||||
[index.html.xslt]
|
||||
filters = rebase.py?base=http://example.com/
|
||||
|
||||
[tests/data/spider/testfeed0.atom]
|
||||
name = not found
|
||||
|
||||
[tests/data/spider/testfeed1b.atom]
|
||||
name = one
|
||||
|
||||
[tests/data/spider/testfeed2.atom]
|
||||
name = two
|
||||
|
||||
[tests/data/spider/testfeed3.rss]
|
||||
name = three
|
24
tests/data/apply/rebase.py
Normal file
24
tests/data/apply/rebase.py
Normal file
@ -0,0 +1,24 @@
|
||||
# make href attributes absolute, using base argument passed in
|
||||
|
||||
import sys
|
||||
try:
|
||||
base = sys.argv[sys.argv.index('--base')+1]
|
||||
except:
|
||||
sys.stderr.write('Missing required argument: base\n')
|
||||
sys.exit()
|
||||
|
||||
from xml.dom import minidom, Node
|
||||
from urlparse import urljoin
|
||||
|
||||
def rebase(node, newbase):
|
||||
if node.hasAttribute('href'):
|
||||
href=node.getAttribute('href')
|
||||
if href != urljoin(base,href):
|
||||
node.setAttribute('href', urljoin(base,href))
|
||||
for child in node.childNodes:
|
||||
if child.nodeType == Node.ELEMENT_NODE:
|
||||
rebase(child, newbase)
|
||||
|
||||
doc = minidom.parse(sys.stdin)
|
||||
rebase(doc.documentElement, base)
|
||||
print doc.toxml('utf-8')
|
@ -63,6 +63,14 @@ class ApplyTest(unittest.TestCase):
|
||||
self.assertTrue(html.find(
|
||||
'<h4><a href="http://example.com/2">Venus</a></h4>')>=0)
|
||||
|
||||
def test_apply_filter(self):
|
||||
config.load(configfile % 'filter')
|
||||
splice.apply(self.feeddata)
|
||||
|
||||
# verify that index.html is well formed, has content, and xml:lang
|
||||
html = open(os.path.join(workdir, 'index.html')).read()
|
||||
self.assertTrue(html.find(' href="http://example.com/default.css"')>=0)
|
||||
|
||||
try:
|
||||
import libxml2
|
||||
except ImportError:
|
||||
|
Loading…
x
Reference in New Issue
Block a user