Properly handle content type text/plain

This commit is contained in:
Sam Ruby 2007-01-12 06:19:19 -05:00
parent 3024af031f
commit f2ac92465d

View File

@ -16,7 +16,6 @@ Todo:
import re, time, md5, sgmllib import re, time, md5, sgmllib
from xml.sax.saxutils import escape from xml.sax.saxutils import escape
from xml.dom import minidom, Node from xml.dom import minidom, Node
from BeautifulSoup import BeautifulSoup
from planet.html5lib import liberalxmlparser, treebuilders from planet.html5lib import liberalxmlparser, treebuilders
import planet, config import planet, config
@ -139,25 +138,33 @@ def content(xentry, name, detail, bozo):
xdiv = '<div xmlns="http://www.w3.org/1999/xhtml">%s</div>' xdiv = '<div xmlns="http://www.w3.org/1999/xhtml">%s</div>'
xdoc = xentry.ownerDocument xdoc = xentry.ownerDocument
xcontent = xdoc.createElement(name) xcontent = xdoc.createElement(name)
if isinstance(detail.value,unicode): if isinstance(detail.value,unicode):
detail.value=detail.value.encode('utf-8') detail.value=detail.value.encode('utf-8')
parser = liberalxmlparser.XHTMLParser(tree=treebuilders.dom.TreeBuilder) if not detail.has_key('type') or detail.type.lower().find('html')<0:
html = parser.parse(xdiv % detail.value, encoding="utf-8") detail['value'] = escape(detail.value)
for body in html.documentElement.childNodes: detail['type'] = 'text/html'
if body.nodeType != Node.ELEMENT_NODE: continue
if body.nodeName != 'body': continue if detail.type.find('xhtml')>=0 and not bozo:
for div in body.childNodes: data = minidom.parseString(xdiv % detail.value).documentElement
if div.nodeType != Node.ELEMENT_NODE: continue else:
if div.nodeName != 'div': continue parser = liberalxmlparser.XHTMLParser(tree=treebuilders.dom.TreeBuilder)
div.normalize() html = parser.parse(xdiv % detail.value, encoding="utf-8")
if len(div.childNodes) == 1 and \ for body in html.documentElement.childNodes:
div.firstChild.nodeType == Node.TEXT_NODE: if body.nodeType != Node.ELEMENT_NODE: continue
data = div.firstChild if body.nodeName != 'body': continue
else: for div in body.childNodes:
data = div if div.nodeType != Node.ELEMENT_NODE: continue
xcontent.setAttribute('type', 'xhtml') if div.nodeName != 'div': continue
break div.normalize()
if len(div.childNodes) == 1 and \
div.firstChild.nodeType == Node.TEXT_NODE:
data = div.firstChild
else:
data = div
xcontent.setAttribute('type', 'xhtml')
break
if data: xcontent.appendChild(data) if data: xcontent.appendChild(data)