diff --git a/INSTALL b/INSTALL index 0606283..2d51853 100644 --- a/INSTALL +++ b/INSTALL @@ -19,10 +19,10 @@ distribution. python runtests.py - This should take anywhere from a half a second to ten seconds to execute. - No network connection is required, and it cleans up after itself. If it - completes with an "OK", you are good to go. Otherwise stopping here and - inquiring on the mailing list is a good idea as it can save you lots of + This should take anywhere from a one to ten seconds to execute. No network + connection is required, and the script cleans up after itself. If the + script completes with an "OK", you are good to go. Otherwise stopping here + and inquiring on the mailing list is a good idea as it can save you lots of frustration down the road. iii. diff --git a/filters/excerpt.py b/filters/excerpt.py new file mode 100644 index 0000000..5b70438 --- /dev/null +++ b/filters/excerpt.py @@ -0,0 +1,103 @@ +""" +Generate an excerpt from either the summary or a content of an entry. + +Parameters: + width: maximum number of characters in the excerpt. Default: 500 + omit: whitespace delimited list of html tags to remove. Default: none + +Notes: + * if 'img' is in the list of tags to be omitted tags are replaced with + hypertext links associated with the value of the 'alt' attribute. If there + is no alt attribute value, is used instead. If the parent element + of the img tag is already an tag, no additional hypertext links are + added. +""" + +import sys, xml.dom.minidom, textwrap +from xml.dom import Node, minidom + +atomNS = 'http://www.w3.org/2005/Atom' +planetNS = 'http://planet.intertwingly.net' + +args = dict(zip([name.lstrip('-') for name in sys.argv[1::2]], sys.argv[2::2])) + +wrapper = textwrap.TextWrapper(width=int(args.get('width','500'))) +omit = args.get('omit', '').split() + +class copy: + """ recursively copy a source to a target, up to a given width """ + + def __init__(self, dom, source, target): + self.dom = dom + self.full = False + self.text = [] + self.textlen = 0 + self.copyChildren(source, target) + + def copyChildren(self, source, target): + """ copy child nodes of a source to the target """ + for child in source.childNodes: + if child.nodeType == Node.ELEMENT_NODE: + self.copyElement(child, target) + elif child.nodeType == Node.TEXT_NODE: + self.copyText(child.data, target) + if self.full: break + + def copyElement(self, source, target): + """ copy source element to the target """ + + # check the omit list + if source.nodeName in omit: + if source.nodeName == 'img': + return self.elideImage(source, target) + return self.copyChildren(source, target) + + # copy element, attributes, and children + child = self.dom.createElementNS(source.namespaceURI, source.nodeName) + target.appendChild(child) + for i in range(0, source.attributes.length): + attr = source.attributes.item(i) + child.setAttributeNS(attr.namespaceURI, attr.name, attr.value) + self.copyChildren(source, child) + + def elideImage(self, source, target): + """ copy an elided form of the image element to the target """ + alt = source.getAttribute('alt') or '' + src = source.getAttribute('src') + + if target.nodeName == 'a' or not src: + self.copyText(alt, target) + else: + child = self.dom.createElement('a') + child.setAttribute('href', src) + self.copyText(alt, child) + target.appendChild(child) + + def copyText(self, source, target): + """ copy text to the target, until the point where it would wrap """ + if not source.isspace() and source.strip(): + self.text.append(source.strip()) + lines = wrapper.wrap(' '.join(self.text)) + if len(lines) == 1: + target.appendChild(self.dom.createTextNode(source)) + self.textlen = len(lines[0]) + else: + excerpt = source[:len(lines[0])-self.textlen] + u' \u2026' + target.appendChild(dom.createTextNode(excerpt)) + self.full = True + +# select summary or content element +dom = minidom.parse(sys.stdin) +source = dom.getElementsByTagNameNS(atomNS, 'summary') +if not source: + source = dom.getElementsByTagNameNS(atomNS, 'content') + +# if present, recursively copy it to a planet:excerpt element +if source: + dom.documentElement.setAttribute('xmlns:planet', planetNS) + target = dom.createElementNS(planetNS, 'planet:excerpt') + source[0].parentNode.appendChild(target) + copy(dom, source[0], target) + +# print out results +print dom.toxml('utf-8') diff --git a/planet/shell/__init__.py b/planet/shell/__init__.py index 4d83661..72cad5b 100644 --- a/planet/shell/__init__.py +++ b/planet/shell/__init__.py @@ -29,18 +29,18 @@ def run(template_file, doc, mode='template'): try: module = __import__(module_name) except Exception, inst: - print module_name return log.error("Skipping %s '%s' after failing to load '%s': %s", mode, template_resolved, module_name, inst) # Execute the shell module + options = planet.config.template_options(template_file) if mode == 'filter': log.debug("Processing filer %s using %s", template_resolved, module_name) - return module.run(template_resolved, doc, None) + return module.run(template_resolved, doc, None, options) else: log.info("Processing template %s using %s", template_resolved, module_name) output_dir = planet.config.output_dir() output_file = os.path.join(output_dir, base) - module.run(template_resolved, doc, output_file) + module.run(template_resolved, doc, output_file, options) diff --git a/planet/shell/py.py b/planet/shell/py.py index cb233fb..f4476fa 100644 --- a/planet/shell/py.py +++ b/planet/shell/py.py @@ -1,6 +1,6 @@ from subprocess import Popen, PIPE -def run(script, doc, output_file=None): +def run(script, doc, output_file=None, options={}): """ process an Python script """ if output_file: @@ -8,9 +8,14 @@ def run(script, doc, output_file=None): else: out = PIPE - proc = Popen(['python', script], stdin=PIPE, stdout=out, stderr=PIPE) + options = sum([['--'+key, value] for key,value in options.items()], []) + + proc = Popen(['python', script] + options, + stdin=PIPE, stdout=out, stderr=PIPE) + stdout, stderr = proc.communicate(doc) if stderr: - print stderr + import planet + planet.logger.error(stderr) return stdout diff --git a/planet/shell/tmpl.py b/planet/shell/tmpl.py index b566f1c..f5c038e 100644 --- a/planet/shell/tmpl.py +++ b/planet/shell/tmpl.py @@ -221,7 +221,7 @@ def template_info(source): return output -def run(script, doc, output_file=None): +def run(script, doc, output_file=None, options={}): """ process an HTMLTMPL file """ manager = htmltmpl.TemplateManager() template = manager.prepare(script) diff --git a/planet/shell/xslt.py b/planet/shell/xslt.py index 1e6de03..593f7b9 100644 --- a/planet/shell/xslt.py +++ b/planet/shell/xslt.py @@ -1,6 +1,6 @@ import os -def run(script, doc, output_file=None): +def run(script, doc, output_file=None, options={}): """ process an XSLT stylesheet """ try: diff --git a/tests/data/filter/excerpt-images.ini b/tests/data/filter/excerpt-images.ini new file mode 100644 index 0000000..e95af75 --- /dev/null +++ b/tests/data/filter/excerpt-images.ini @@ -0,0 +1,5 @@ +[Planet] +filters = excerpt.py + +[excerpt.py] +omit = img diff --git a/tests/data/filter/excerpt-images.xml b/tests/data/filter/excerpt-images.xml new file mode 100644 index 0000000..aab7cd9 --- /dev/null +++ b/tests/data/filter/excerpt-images.xml @@ -0,0 +1,10 @@ + +
before + +bar +bar + + +after
+ + diff --git a/tests/data/filter/excerpt-lorem-ipsum.ini b/tests/data/filter/excerpt-lorem-ipsum.ini new file mode 100644 index 0000000..85bbac8 --- /dev/null +++ b/tests/data/filter/excerpt-lorem-ipsum.ini @@ -0,0 +1,6 @@ +[Planet] +filters = excerpt.py + +[excerpt.py] +width = 100 +omit = p diff --git a/tests/data/filter/excerpt-lorem-ipsum.xml b/tests/data/filter/excerpt-lorem-ipsum.xml new file mode 100644 index 0000000..0561810 --- /dev/null +++ b/tests/data/filter/excerpt-lorem-ipsum.xml @@ -0,0 +1,8 @@ + +

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nullam velit. Vivamus tincidunt, erat in rutrum fringilla, urna urna nonummy turpis, et lobortis eros dolor eu dui. Pellentesque vitae lorem. Sed lobortis arcu accumsan sapien. Pellentesque eget nulla et justo mollis mattis. Nulla dictum est eleifend nisl. Pellentesque ultricies ligula vel arcu. Ut ac mi in felis porta tristique. Donec cursus mollis ipsum. Maecenas nonummy.

+ +

Sed posuere. Phasellus pellentesque mattis mauris. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos hymenaeos. Ut ullamcorper consequat eros. Morbi hendrerit faucibus felis. Pellentesque odio eros, bibendum eget, ultrices ac, tempus quis, diam. Donec posuere, ligula eget sodales tristique, enim nunc faucibus nibh, luctus sagittis elit orci a nulla. Nulla scelerisque. In hac habitasse platea dictumst. Etiam vel nisl quis mauris metus.

+ +

Vivamus nonummy, justo at malesuada mollis, nisi purus fermentum neque, a faucibus dolor lorem at sem. Nunc quam nulla, lobortis sed, vehicula at, elementum volutpat.

+
+ diff --git a/tests/test_filters.py b/tests/test_filters.py index 5dc2938..7f7a433 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -1,15 +1,46 @@ #!/usr/bin/env python import unittest, xml.dom.minidom -from planet import shell - -testfile = 'tests/data/filter/coral_cdn.xml' -filter = 'coral_cdn_filter.py' +from planet import shell, config class FilterTests(unittest.TestCase): def test_coral_cdn(self): + testfile = 'tests/data/filter/coral_cdn.xml' + filter = 'coral_cdn_filter.py' + output = shell.run(filter, open(testfile).read(), mode="filter") dom = xml.dom.minidom.parseString(output) imgsrc = dom.getElementsByTagName('img')[0].getAttribute('src') self.assertEqual('http://example.com.nyud.net:8080/foo.png', imgsrc) + + def test_excerpt_images(self): + testfile = 'tests/data/filter/excerpt-images.xml' + config.load('tests/data/filter/excerpt-images.ini') + + output = open(testfile).read() + for filter in config.filters(): + output = shell.run(filter, output, mode="filter") + + dom = xml.dom.minidom.parseString(output) + excerpt = dom.getElementsByTagName('planet:excerpt')[0] + anchors = excerpt.getElementsByTagName('a') + hrefs = [a.getAttribute('href') for a in anchors] + texts = [a.lastChild.nodeValue for a in anchors] + + self.assertEqual(['inner','outer1','outer2'], hrefs) + self.assertEqual(['bar','bar',''], texts) + + def test_excerpt_lorem_ipsum(self): + testfile = 'tests/data/filter/excerpt-lorem-ipsum.xml' + config.load('tests/data/filter/excerpt-lorem-ipsum.ini') + + output = open(testfile).read() + for filter in config.filters(): + output = shell.run(filter, output, mode="filter") + + dom = xml.dom.minidom.parseString(output) + excerpt = dom.getElementsByTagName('planet:excerpt')[0] + self.assertEqual(u'Lorem ipsum dolor sit amet, consectetuer ' + + u'adipiscing elit. Nullam velit. Vivamus tincidunt, erat ' + + u'in \u2026', excerpt.firstChild.firstChild.nodeValue)