From f1c373051120c3d8f27a5dda7eb8b3b437e0892f Mon Sep 17 00:00:00 2001 From: Sam Ruby Date: Fri, 15 Sep 2006 07:20:54 -0400 Subject: [PATCH] xpath sifter and category support --- filters/xpath_sifter.py | 23 +++++++++++++++++++++ planet/reconstitute.py | 16 ++++++++++++++ tests/data/filter/category-one.xml | 3 +++ tests/data/filter/category-two.xml | 3 +++ tests/data/filter/coral_cdn.xml | 2 +- tests/data/filter/xpath-sifter.ini | 6 ++++++ tests/data/reconstitute/category_label.xml | 11 ++++++++++ tests/data/reconstitute/category_scheme.xml | 11 ++++++++++ tests/data/reconstitute/category_term.xml | 11 ++++++++++ tests/data/reconstitute/source_category.xml | 14 +++++++++++++ tests/test_filters.py | 18 ++++++++++++++++ 11 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 filters/xpath_sifter.py create mode 100644 tests/data/filter/category-one.xml create mode 100644 tests/data/filter/category-two.xml create mode 100644 tests/data/filter/xpath-sifter.ini create mode 100644 tests/data/reconstitute/category_label.xml create mode 100644 tests/data/reconstitute/category_scheme.xml create mode 100644 tests/data/reconstitute/category_term.xml create mode 100644 tests/data/reconstitute/source_category.xml diff --git a/filters/xpath_sifter.py b/filters/xpath_sifter.py new file mode 100644 index 0000000..c7c14c4 --- /dev/null +++ b/filters/xpath_sifter.py @@ -0,0 +1,23 @@ +import sys, libxml2 + +# parse options +options = dict(zip(sys.argv[1::2],sys.argv[2::2])) + +# parse entry +doc = libxml2.parseDoc(sys.stdin.read()) +ctxt = doc.xpathNewContext() +ctxt.xpathRegisterNs('atom','http://www.w3.org/2005/Atom') +ctxt.xpathRegisterNs('xhtml','http://www.w3.org/1999/xhtml') + +# process requirements +if options.has_key('--require'): + for xpath in options['--require'].split('\n'): + if xpath and not ctxt.xpathEval(xpath): sys.exit(1) + +# process exclusions +if options.has_key('--exclude'): + for xpath in options['--exclude'].split('\n'): + if xpath and ctxt.xpathEval(xpath): sys.exit(1) + +# if we get this far, the feed is to be included +print doc diff --git a/planet/reconstitute.py b/planet/reconstitute.py index 92ba3da..48a0c85 100644 --- a/planet/reconstitute.py +++ b/planet/reconstitute.py @@ -110,6 +110,16 @@ def date(xentry, name, parsed): formatted = time.strftime(config.date_format(), parsed) xdate.setAttribute('planet:format', formatted) +def category(xentry, tag): + xtag = xentry.ownerDocument.createElement('category') + if tag.has_key('term') and tag.term: + xtag.setAttribute('term', tag.get('term')) + if tag.has_key('scheme') and tag.scheme: + xtag.setAttribute('scheme', tag.get('scheme')) + if tag.has_key('label') and tag.label: + xtag.setAttribute('label', tag.get('label')) + xentry.appendChild(xtag) + def author(xentry, name, detail): """ insert an author-like element into the entry """ if not detail: return @@ -160,6 +170,9 @@ def source(xsource, source, bozo): createTextElement(xsource, 'icon', source.get('icon', None)) createTextElement(xsource, 'logo', source.get('logo', None)) + for tag in source.get('tags',[]): + category(xsource, tag) + author_detail = source.get('author_detail',{}) if not author_detail.has_key('name') and source.has_key('planet_name'): author_detail['name'] = source['planet_name'] @@ -201,6 +214,9 @@ def reconstitute(feed, entry): date(xentry, 'updated', entry.get('updated_parsed',time.gmtime())) date(xentry, 'published', entry.get('published_parsed',None)) + for tag in entry.get('tags',[]): + category(xentry, tag) + author(xentry, 'author', entry.get('author_detail',None)) for contributor in entry.get('contributors',[]): author(xentry, 'contributor', contributor) diff --git a/tests/data/filter/category-one.xml b/tests/data/filter/category-one.xml new file mode 100644 index 0000000..922589e --- /dev/null +++ b/tests/data/filter/category-one.xml @@ -0,0 +1,3 @@ + + + diff --git a/tests/data/filter/category-two.xml b/tests/data/filter/category-two.xml new file mode 100644 index 0000000..7e6d990 --- /dev/null +++ b/tests/data/filter/category-two.xml @@ -0,0 +1,3 @@ + + + diff --git a/tests/data/filter/coral_cdn.xml b/tests/data/filter/coral_cdn.xml index 3c45248..072353c 100644 --- a/tests/data/filter/coral_cdn.xml +++ b/tests/data/filter/coral_cdn.xml @@ -1,4 +1,4 @@ - +
diff --git a/tests/data/filter/xpath-sifter.ini b/tests/data/filter/xpath-sifter.ini new file mode 100644 index 0000000..90a08a6 --- /dev/null +++ b/tests/data/filter/xpath-sifter.ini @@ -0,0 +1,6 @@ +[Planet] +filters = xpath_sifter.py + +[xpath_sifter.py] +require: + //atom:category[@term='two'] diff --git a/tests/data/reconstitute/category_label.xml b/tests/data/reconstitute/category_label.xml new file mode 100644 index 0000000..fc38fc7 --- /dev/null +++ b/tests/data/reconstitute/category_label.xml @@ -0,0 +1,11 @@ + + + + + + + + diff --git a/tests/data/reconstitute/category_scheme.xml b/tests/data/reconstitute/category_scheme.xml new file mode 100644 index 0000000..b1cbbb4 --- /dev/null +++ b/tests/data/reconstitute/category_scheme.xml @@ -0,0 +1,11 @@ + + + + + + + + diff --git a/tests/data/reconstitute/category_term.xml b/tests/data/reconstitute/category_term.xml new file mode 100644 index 0000000..344fed6 --- /dev/null +++ b/tests/data/reconstitute/category_term.xml @@ -0,0 +1,11 @@ + + + + + + + + diff --git a/tests/data/reconstitute/source_category.xml b/tests/data/reconstitute/source_category.xml new file mode 100644 index 0000000..722e347 --- /dev/null +++ b/tests/data/reconstitute/source_category.xml @@ -0,0 +1,14 @@ + + + + + + + + + + diff --git a/tests/test_filters.py b/tests/test_filters.py index 14aafe9..aeee9a4 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -58,6 +58,24 @@ class FilterTests(unittest.TestCase): self.assertEqual(u'before--after', excerpt.firstChild.firstChild.nodeValue) + def test_xpath_filter(self): + config.load('tests/data/filter/xpath-sifter.ini') + testfile = 'tests/data/filter/category-one.xml' + + output = open(testfile).read() + for filter in config.filters(): + output = shell.run(filter, output, mode="filter") + + self.assertEqual('', output) + + testfile = 'tests/data/filter/category-two.xml' + + output = open(testfile).read() + for filter in config.filters(): + output = shell.run(filter, output, mode="filter") + + self.assertNotEqual('', output) + try: from subprocess import Popen, PIPE sed=Popen(['sed','--version'],stdout=PIPE,stderr=PIPE)