diff --git a/docs/filters.html b/docs/filters.html
index b4a0394..865aa41 100644
--- a/docs/filters.html
+++ b/docs/filters.html
@@ -13,7 +13,7 @@
parameters come from the config file, and output goes to stdout
.
Anything written to stderr
is logged as an ERROR message. If no
stdout
is produced, the entry is not written to the cache or
-processed further.
+processed further; in fact, if the entry had previously been written to the cache, it will be removed.
Input to a filter is a aggressively
normalized entry. For
@@ -54,6 +54,18 @@ instead of XPath expressions.
Notes
+- Filters are executed when a feed is fetched, and the results are placed
+into the cache. Changing a configuration file alone is not sufficient to
+change the contents of the cache — typically that only occurs after
+a feed is modified.
+
+- Filters are simply invoked in the order they are listed in the
+configuration file (think unix pipes). Planet wide filters are executed before
+feed specific filters.
+
+- Any filters listed in the
[planet]
section of your config.ini
+will be invoked on all feeds. Filters listed in individual
+[feed]
sections will only be invoked on those feeds.
- The file extension of the filter is significant.
.py
invokes
python. .xslt
involkes XSLT. .sed
and
@@ -61,14 +73,6 @@ python. .xslt
involkes XSLT. .sed
and
perl or ruby or class/jar (java), aren't supported at the moment, but these
would be easy to add.
-- Any filters listed in the
[planet]
section of your config.ini
-will be invoked on all feeds. Filters listed in individual
-[feed]
sections will only be invoked on those feeds.
-
-- Filters are simply invoked in the order they are listed in the
-configuration file (think unix pipes). Planet wide filters are executed before
-feed specific filters.
-
- Templates written using htmltmpl currently only have access to a fixed set
of fields, whereas XSLT templates have access to everything.
diff --git a/planet/spider.py b/planet/spider.py
index 3ee8515..e88d203 100644
--- a/planet/spider.py
+++ b/planet/spider.py
@@ -194,7 +194,9 @@ def writeCache(feed_uri, feed_info, data):
for filter in config.filters(feed_uri):
output = shell.run(filter, output, mode="filter")
if not output: break
- if not output: continue
+ if not output:
+ if os.path.exists(cache_file): os.remove(cache_file)
+ continue
# write out and timestamp the results
write(output, cache_file)
diff --git a/tests/test_spider.py b/tests/test_spider.py
index ecf5986..1936f8d 100644
--- a/tests/test_spider.py
+++ b/tests/test_spider.py
@@ -73,6 +73,14 @@ class SpiderTest(unittest.TestCase):
self.spiderFeed(testfeed % '1b')
self.verify_spiderFeed()
+ def test_spiderFeed_retroactive_filter(self):
+ config.load(configfile)
+ self.spiderFeed(testfeed % '1b')
+ self.assertEqual(5, len(glob.glob(workdir+"/*")))
+ config.parser.set('Planet', 'filter', 'two')
+ self.spiderFeed(testfeed % '1b')
+ self.assertEqual(1, len(glob.glob(workdir+"/*")))
+
def test_spiderUpdate(self):
config.load(configfile)
self.spiderFeed(testfeed % '1a')