Don't emit separate open and close tags for empty void elements using htmltmpl
This commit is contained in:
parent
82753d09a1
commit
9aba1dbfc7
@ -1,7 +1,10 @@
|
|||||||
from xml.sax.saxutils import escape
|
from xml.sax.saxutils import escape
|
||||||
import sgmllib, time, os, sys, new, urlparse
|
import sgmllib, time, os, sys, new, urlparse, re
|
||||||
from planet import config, feedparser, htmltmpl
|
from planet import config, feedparser, htmltmpl
|
||||||
|
|
||||||
|
voids=feedparser._BaseHTMLProcessor.elements_no_end_tag
|
||||||
|
empty=re.compile(r"<((%s)[^>]*)></\2>" % '|'.join(voids))
|
||||||
|
|
||||||
class stripHtml(sgmllib.SGMLParser):
|
class stripHtml(sgmllib.SGMLParser):
|
||||||
"remove all tags from the data"
|
"remove all tags from the data"
|
||||||
def __init__(self, data):
|
def __init__(self, data):
|
||||||
@ -130,9 +133,12 @@ def tmpl_mapper(source, rules):
|
|||||||
node = source
|
node = source
|
||||||
for path in rule[2:]:
|
for path in rule[2:]:
|
||||||
if isinstance(path, str) and path in node:
|
if isinstance(path, str) and path in node:
|
||||||
if path == 'value' and node.get('type','')=='text/plain':
|
if path == 'value':
|
||||||
|
if node.get('type','')=='text/plain':
|
||||||
node['value'] = escape(node['value'])
|
node['value'] = escape(node['value'])
|
||||||
node['type'] = 'text/html'
|
node['type'] = 'text/html'
|
||||||
|
elif node.get('type','')=='application/xhtml+xml':
|
||||||
|
node['value'] = empty.sub(r"<\1 />", node['value'])
|
||||||
node = node[path]
|
node = node[path]
|
||||||
elif isinstance(path, int):
|
elif isinstance(path, int):
|
||||||
node = node[path]
|
node = node[path]
|
||||||
|
13
tests/data/filter/tmpl/content_xhtml2.xml
Normal file
13
tests/data/filter/tmpl/content_xhtml2.xml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
<!--
|
||||||
|
Description: xhtml content
|
||||||
|
Expect: Items[0]['content'] == '<img src="x.jpg" />'
|
||||||
|
-->
|
||||||
|
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<entry>
|
||||||
|
<content type="xhtml">
|
||||||
|
<div xmlns="http://www.w3.org/1999/xhtml"><img src="x.jpg"/></div>
|
||||||
|
</content>
|
||||||
|
</entry>
|
||||||
|
</feed>
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user