Properly handle content type text/plain
This commit is contained in:
parent
3024af031f
commit
f2ac92465d
@ -16,7 +16,6 @@ Todo:
|
|||||||
import re, time, md5, sgmllib
|
import re, time, md5, sgmllib
|
||||||
from xml.sax.saxutils import escape
|
from xml.sax.saxutils import escape
|
||||||
from xml.dom import minidom, Node
|
from xml.dom import minidom, Node
|
||||||
from BeautifulSoup import BeautifulSoup
|
|
||||||
from planet.html5lib import liberalxmlparser, treebuilders
|
from planet.html5lib import liberalxmlparser, treebuilders
|
||||||
import planet, config
|
import planet, config
|
||||||
|
|
||||||
@ -139,25 +138,33 @@ def content(xentry, name, detail, bozo):
|
|||||||
xdiv = '<div xmlns="http://www.w3.org/1999/xhtml">%s</div>'
|
xdiv = '<div xmlns="http://www.w3.org/1999/xhtml">%s</div>'
|
||||||
xdoc = xentry.ownerDocument
|
xdoc = xentry.ownerDocument
|
||||||
xcontent = xdoc.createElement(name)
|
xcontent = xdoc.createElement(name)
|
||||||
|
|
||||||
if isinstance(detail.value,unicode):
|
if isinstance(detail.value,unicode):
|
||||||
detail.value=detail.value.encode('utf-8')
|
detail.value=detail.value.encode('utf-8')
|
||||||
|
|
||||||
parser = liberalxmlparser.XHTMLParser(tree=treebuilders.dom.TreeBuilder)
|
if not detail.has_key('type') or detail.type.lower().find('html')<0:
|
||||||
html = parser.parse(xdiv % detail.value, encoding="utf-8")
|
detail['value'] = escape(detail.value)
|
||||||
for body in html.documentElement.childNodes:
|
detail['type'] = 'text/html'
|
||||||
if body.nodeType != Node.ELEMENT_NODE: continue
|
|
||||||
if body.nodeName != 'body': continue
|
if detail.type.find('xhtml')>=0 and not bozo:
|
||||||
for div in body.childNodes:
|
data = minidom.parseString(xdiv % detail.value).documentElement
|
||||||
if div.nodeType != Node.ELEMENT_NODE: continue
|
else:
|
||||||
if div.nodeName != 'div': continue
|
parser = liberalxmlparser.XHTMLParser(tree=treebuilders.dom.TreeBuilder)
|
||||||
div.normalize()
|
html = parser.parse(xdiv % detail.value, encoding="utf-8")
|
||||||
if len(div.childNodes) == 1 and \
|
for body in html.documentElement.childNodes:
|
||||||
div.firstChild.nodeType == Node.TEXT_NODE:
|
if body.nodeType != Node.ELEMENT_NODE: continue
|
||||||
data = div.firstChild
|
if body.nodeName != 'body': continue
|
||||||
else:
|
for div in body.childNodes:
|
||||||
data = div
|
if div.nodeType != Node.ELEMENT_NODE: continue
|
||||||
xcontent.setAttribute('type', 'xhtml')
|
if div.nodeName != 'div': continue
|
||||||
break
|
div.normalize()
|
||||||
|
if len(div.childNodes) == 1 and \
|
||||||
|
div.firstChild.nodeType == Node.TEXT_NODE:
|
||||||
|
data = div.firstChild
|
||||||
|
else:
|
||||||
|
data = div
|
||||||
|
xcontent.setAttribute('type', 'xhtml')
|
||||||
|
break
|
||||||
|
|
||||||
if data: xcontent.appendChild(data)
|
if data: xcontent.appendChild(data)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user