2007-04-30 09:38:09 -04:00

231 lines
8.2 KiB
Python
Executable File

import _base
from xml.dom import minidom, Node, XML_NAMESPACE, XMLNS_NAMESPACE
import new
from xml.sax.saxutils import escape
from constants import voidElements
import re
illegal_xml_chars = re.compile("[\x01-\x08\x0B\x0C\x0E-\x1F]")
class AttrList:
def __init__(self, element):
self.element = element
def __iter__(self):
return self.element.attributes.items().__iter__()
def __setitem__(self, name, value):
value=illegal_xml_chars.sub(u'\uFFFD',value)
self.element.setAttribute(name, value)
def items(self):
return self.element.attributes.items()
def keys(self):
return self.element.attributes.keys()
def __getitem__(self, name):
return self.element.getAttribute(name)
class NodeBuilder(_base.Node):
def __init__(self, element):
_base.Node.__init__(self, element.nodeName)
self.element = element
def appendChild(self, node):
node.parent = self
self.element.appendChild(node.element)
def insertText(self, data, insertBefore=None):
data=illegal_xml_chars.sub(u'\uFFFD',data)
text = self.element.ownerDocument.createTextNode(data)
if insertBefore:
self.element.insertBefore(text, insertBefore.element)
else:
self.element.appendChild(text)
def insertBefore(self, node, refNode):
self.element.insertBefore(node.element, refNode.element)
node.parent = self
def removeChild(self, node):
self.element.removeChild(node.element)
node.parent = None
def reparentChildren(self, newParent):
while self.element.hasChildNodes():
child = self.element.firstChild
self.element.removeChild(child)
newParent.element.appendChild(child)
self.childNodes = []
def getAttributes(self):
return AttrList(self.element)
def setAttributes(self, attributes):
if attributes:
for name, value in attributes.items():
value=illegal_xml_chars.sub(u'\uFFFD',value)
self.element.setAttribute(name, value)
attributes = property(getAttributes, setAttributes)
def cloneNode(self):
return NodeBuilder(self.element.cloneNode(False))
def hasContent(self):
return self.element.hasChildNodes()
class TreeBuilder(_base.TreeBuilder):
def documentClass(self):
self.dom = minidom.getDOMImplementation().createDocument(None,None,None)
def hilite(self, encoding):
print 'foo'
method = new.instancemethod(hilite, self.dom, self.dom.__class__)
setattr(self.dom, 'hilite', method)
return self
def doctypeClass(self,name):
domimpl = minidom.getDOMImplementation()
return NodeBuilder(domimpl.createDocumentType(name,None,None))
def elementClass(self, name):
return NodeBuilder(self.dom.createElement(name))
def commentClass(self, data):
return NodeBuilder(self.dom.createComment(data))
def fragmentClass(self):
return NodeBuilder(self.dom.createDocumentFragment())
def appendChild(self, node):
self.dom.appendChild(node.element)
def testSerializer(self, element):
return testSerializer(element)
def getDocument(self):
return self.dom
def getFragment(self):
return _base.TreeBuilder.getFragment(self).element
def insertText(self, data, parent=None):
data=illegal_xml_chars.sub(u'\uFFFD',data)
if parent <> self:
_base.TreeBuilder.insertText(self, data, parent)
else:
# HACK: allow text nodes as children of the document node
if hasattr(self.dom, '_child_node_types'):
if not Node.TEXT_NODE in self.dom._child_node_types:
self.dom._child_node_types=list(self.dom._child_node_types)
self.dom._child_node_types.append(Node.TEXT_NODE)
self.dom.appendChild(self.dom.createTextNode(data))
name = None
def testSerializer(element):
element.normalize()
rv = []
def serializeElement(element, indent=0):
if element.nodeType == Node.DOCUMENT_TYPE_NODE:
rv.append("|%s<!DOCTYPE %s>"%(' '*indent, element.name))
elif element.nodeType == Node.DOCUMENT_NODE:
rv.append("#document")
elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
rv.append("#document-fragment")
elif element.nodeType == Node.COMMENT_NODE:
rv.append("|%s<!-- %s -->"%(' '*indent, element.nodeValue))
elif element.nodeType == Node.TEXT_NODE:
rv.append("|%s\"%s\"" %(' '*indent, element.nodeValue))
else:
rv.append("|%s<%s>"%(' '*indent, element.nodeName))
if element.hasAttributes():
for name, value in element.attributes.items():
rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
indent += 2
for child in element.childNodes:
serializeElement(child, indent)
serializeElement(element, 0)
return "\n".join(rv)
class HTMLSerializer(object):
def serialize(self, node):
rv = self.serializeNode(node)
for child in node.childNodes:
rv += self.serialize(child)
if node.nodeType == Node.ELEMENT_NODE and node.nodeName not in voidElements:
rv += "</%s>\n"%node.nodeName
return rv
def serializeNode(self, node):
if node.nodeType == Node.TEXT_NODE:
rv = node.nodeValue
elif node.nodeType == Node.ELEMENT_NODE:
rv = "<%s"%node.nodeName
if node.hasAttributes():
rv = rv+"".join([" %s='%s'"%(key, escape(value)) for key,value in
node.attributes.items()])
rv += ">"
elif node.nodeType == Node.COMMENT_NODE:
rv = "<!-- %s -->" % escape(node.nodeValue)
elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
rv = "<!DOCTYPE %s>" % node.name
else:
rv = ""
return rv
def dom2sax(node, handler, nsmap={'xml':XML_NAMESPACE}):
if node.nodeType == Node.ELEMENT_NODE:
if not nsmap:
handler.startElement(node.nodeName, node.attributes)
for child in node.childNodes: dom2sax(child, handler, nsmap)
handler.endElement(node.nodeName)
else:
attributes = dict(node.attributes.itemsNS())
# gather namespace declarations
prefixes = []
for attrname in node.attributes.keys():
attr = node.getAttributeNode(attrname)
if (attr.namespaceURI == XMLNS_NAMESPACE or
(attr.namespaceURI == None and attr.nodeName.startswith('xmlns'))):
prefix = (attr.localName != 'xmlns' and attr.localName or None)
handler.startPrefixMapping(prefix, attr.nodeValue)
prefixes.append(prefix)
nsmap = nsmap.copy()
nsmap[prefix] = attr.nodeValue
del attributes[(attr.namespaceURI, attr.localName)]
# apply namespace declarations
for attrname in node.attributes.keys():
attr = node.getAttributeNode(attrname)
if attr.namespaceURI == None and ':' in attr.nodeName:
prefix = attr.nodeName.split(':')[0]
if nsmap.has_key(prefix):
del attributes[(attr.namespaceURI, attr.localName)]
attributes[(nsmap[prefix],attr.localName)]=attr.nodeValue
# SAX events
ns = node.namespaceURI or nsmap.get(None,None)
handler.startElementNS((ns,node.nodeName), node.nodeName, attributes)
for child in node.childNodes: dom2sax(child, handler, nsmap)
handler.endElementNS((ns, node.nodeName), node.nodeName)
for prefix in prefixes: handler.endPrefixMapping(prefix)
elif node.nodeType in [Node.TEXT_NODE, Node.CDATA_SECTION_NODE]:
handler.characters(node.nodeValue)
elif node.nodeType == Node.DOCUMENT_NODE:
handler.startDocument()
for child in node.childNodes: dom2sax(child, handler, nsmap)
handler.endDocument()
elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
for child in node.childNodes: dom2sax(child, handler, nsmap)
else:
# ATTRIBUTE_NODE
# ENTITY_NODE
# PROCESSING_INSTRUCTION_NODE
# COMMENT_NODE
# DOCUMENT_TYPE_NODE
# NOTATION_NODE
pass