resync with html5lib (includes improved <pre> support)
This commit is contained in:
parent
32a1c49090
commit
bc33615ced
@ -1,3 +1,4 @@
|
|||||||
|
|
||||||
# Differences from the current specification (23 December 2006) are as follows:
|
# Differences from the current specification (23 December 2006) are as follows:
|
||||||
# * Phases and insertion modes are one concept in parser.py.
|
# * Phases and insertion modes are one concept in parser.py.
|
||||||
# * EOF handling is slightly different to make sure <html>, <head> and <body>
|
# * EOF handling is slightly different to make sure <html>, <head> and <body>
|
||||||
@ -553,6 +554,10 @@ class InBodyPhase(Phase):
|
|||||||
# the crazy mode
|
# the crazy mode
|
||||||
def __init__(self, parser, tree):
|
def __init__(self, parser, tree):
|
||||||
Phase.__init__(self, parser, tree)
|
Phase.__init__(self, parser, tree)
|
||||||
|
|
||||||
|
#Keep a ref to this for special handling of whitespace in <pre>
|
||||||
|
self.processSpaceCharactersNonPre = self.processSpaceCharacters
|
||||||
|
|
||||||
self.startTagHandler = utils.MethodDispatcher([
|
self.startTagHandler = utils.MethodDispatcher([
|
||||||
("html", self.startTagHtml),
|
("html", self.startTagHtml),
|
||||||
(("script", "style"), self.startTagScriptStyle),
|
(("script", "style"), self.startTagScriptStyle),
|
||||||
@ -622,6 +627,15 @@ class InBodyPhase(Phase):
|
|||||||
self.tree.openElements[-1])
|
self.tree.openElements[-1])
|
||||||
|
|
||||||
# the real deal
|
# the real deal
|
||||||
|
def processSpaceCharactersPre(self, data):
|
||||||
|
#Sometimes (start of <pre> blocks) we want to drop leading newlines
|
||||||
|
self.processSpaceCharacters = self.processSpaceCharactersNonPre
|
||||||
|
if (data.startswith("\n") and self.tree.openElements[-1].name == "pre"
|
||||||
|
and not self.tree.openElements[-1].hasContent()):
|
||||||
|
data = data[1:]
|
||||||
|
if data:
|
||||||
|
self.tree.insertText(data)
|
||||||
|
|
||||||
def processCharacters(self, data):
|
def processCharacters(self, data):
|
||||||
# XXX The specification says to do this for every character at the
|
# XXX The specification says to do this for every character at the
|
||||||
# moment, but apparently that doesn't match the real world so we don't
|
# moment, but apparently that doesn't match the real world so we don't
|
||||||
@ -651,6 +665,8 @@ class InBodyPhase(Phase):
|
|||||||
if self.tree.elementInScope("p"):
|
if self.tree.elementInScope("p"):
|
||||||
self.endTagP("p")
|
self.endTagP("p")
|
||||||
self.tree.insertElement(name, attributes)
|
self.tree.insertElement(name, attributes)
|
||||||
|
if name == "pre":
|
||||||
|
self.processSpaceCharacters = self.processSpaceCharactersPre
|
||||||
|
|
||||||
def startTagForm(self, name, attributes):
|
def startTagForm(self, name, attributes):
|
||||||
if self.tree.formPointer:
|
if self.tree.formPointer:
|
||||||
@ -849,6 +865,9 @@ class InBodyPhase(Phase):
|
|||||||
self.parser.phase.processEndTag(name)
|
self.parser.phase.processEndTag(name)
|
||||||
|
|
||||||
def endTagBlock(self, name):
|
def endTagBlock(self, name):
|
||||||
|
#Put us back in the right whitespace handling mode
|
||||||
|
if name == "pre":
|
||||||
|
self.processSpaceCharacters = self.processSpaceCharactersNonPre
|
||||||
inScope = self.tree.elementInScope(name)
|
inScope = self.tree.elementInScope(name)
|
||||||
if inScope:
|
if inScope:
|
||||||
self.tree.generateImpliedEndTags()
|
self.tree.generateImpliedEndTags()
|
||||||
|
@ -11,11 +11,6 @@ References:
|
|||||||
* http://wiki.whatwg.org/wiki/HtmlVsXhtml
|
* http://wiki.whatwg.org/wiki/HtmlVsXhtml
|
||||||
|
|
||||||
@@TODO:
|
@@TODO:
|
||||||
* Produce SAX events based on the produced DOM. This is intended not to
|
|
||||||
support streaming, but rather to support application level compatibility.
|
|
||||||
* Optional namespace support
|
|
||||||
* Investigate the use of <![CDATA[]]> when tokenizer.contentModelFlag
|
|
||||||
indicates CDATA processsing to ensure dual HTML/XHTML compatibility.
|
|
||||||
* Selectively lowercase only XHTML, but not foreign markup
|
* Selectively lowercase only XHTML, but not foreign markup
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -50,6 +45,13 @@ class XMLParser(html5parser.HTMLParser):
|
|||||||
if token["data"]:
|
if token["data"]:
|
||||||
self.parseError(_("End tag contains unexpected attributes."))
|
self.parseError(_("End tag contains unexpected attributes."))
|
||||||
|
|
||||||
|
elif token["type"] == "Comment":
|
||||||
|
# Rescue CDATA from the comments
|
||||||
|
if (token["data"].startswith("[CDATA[") and
|
||||||
|
token["data"].endswith("]]")):
|
||||||
|
token["type"] = "Characters"
|
||||||
|
token["data"] = token["data"][7:-2]
|
||||||
|
|
||||||
return token
|
return token
|
||||||
|
|
||||||
class XHTMLParser(XMLParser):
|
class XHTMLParser(XMLParser):
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import _base
|
import _base
|
||||||
from xml.dom import minidom, Node
|
from xml.dom import minidom, Node, XML_NAMESPACE, XMLNS_NAMESPACE
|
||||||
|
import new
|
||||||
|
|
||||||
import re
|
import re
|
||||||
illegal_xml_chars = re.compile("[\x01-\x08\x0B\x0C\x0E-\x1F]")
|
illegal_xml_chars = re.compile("[\x01-\x08\x0B\x0C\x0E-\x1F]")
|
||||||
@ -71,6 +72,10 @@ class NodeBuilder(_base.Node):
|
|||||||
class TreeBuilder(_base.TreeBuilder):
|
class TreeBuilder(_base.TreeBuilder):
|
||||||
def documentClass(self):
|
def documentClass(self):
|
||||||
self.dom = minidom.getDOMImplementation().createDocument(None,None,None)
|
self.dom = minidom.getDOMImplementation().createDocument(None,None,None)
|
||||||
|
def hilite(self, encoding):
|
||||||
|
print 'foo'
|
||||||
|
method = new.instancemethod(hilite, self.dom, self.dom.__class__)
|
||||||
|
setattr(self.dom, 'hilite', method)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def doctypeClass(self,name):
|
def doctypeClass(self,name):
|
||||||
@ -129,3 +134,58 @@ def testSerializer(element):
|
|||||||
serializeElement(element, 0)
|
serializeElement(element, 0)
|
||||||
|
|
||||||
return "\n".join(rv)
|
return "\n".join(rv)
|
||||||
|
|
||||||
|
def dom2sax(node, handler, nsmap={'xml':XML_NAMESPACE}):
|
||||||
|
if node.nodeType == Node.ELEMENT_NODE:
|
||||||
|
if not nsmap:
|
||||||
|
handler.startElement(node.nodeName, node.attributes)
|
||||||
|
for child in node.childNodes: dom2sax(child, handler, nsmap)
|
||||||
|
handler.endElement(node.nodeName)
|
||||||
|
else:
|
||||||
|
attributes = dict(node.attributes.itemsNS())
|
||||||
|
|
||||||
|
# gather namespace declarations
|
||||||
|
prefixes = []
|
||||||
|
for attrname in node.attributes.keys():
|
||||||
|
attr = node.getAttributeNode(attrname)
|
||||||
|
if (attr.namespaceURI == XMLNS_NAMESPACE or
|
||||||
|
(attr.namespaceURI == None and attr.nodeName.startswith('xmlns'))):
|
||||||
|
prefix = (attr.localName != 'xmlns' and attr.localName or None)
|
||||||
|
handler.startPrefixMapping(prefix, attr.nodeValue)
|
||||||
|
prefixes.append(prefix)
|
||||||
|
nsmap = nsmap.copy()
|
||||||
|
nsmap[prefix] = attr.nodeValue
|
||||||
|
del attributes[(attr.namespaceURI, attr.localName)]
|
||||||
|
|
||||||
|
# apply namespace declarations
|
||||||
|
for attrname in node.attributes.keys():
|
||||||
|
attr = node.getAttributeNode(attrname)
|
||||||
|
if attr.namespaceURI == None and ':' in attr.nodeName:
|
||||||
|
prefix = attr.nodeName.split(':')[0]
|
||||||
|
if nsmap.has_key(prefix):
|
||||||
|
del attributes[(attr.namespaceURI, attr.localName)]
|
||||||
|
attributes[(nsmap[prefix],attr.localName)]=attr.nodeValue
|
||||||
|
|
||||||
|
# SAX events
|
||||||
|
ns = node.namespaceURI or nsmap.get(None,None)
|
||||||
|
handler.startElementNS((ns,node.nodeName), node.nodeName, attributes)
|
||||||
|
for child in node.childNodes: dom2sax(child, handler, nsmap)
|
||||||
|
handler.endElementNS((ns, node.nodeName), node.nodeName)
|
||||||
|
for prefix in prefixes: handler.endPrefixMapping(prefix)
|
||||||
|
|
||||||
|
elif node.nodeType in [Node.TEXT_NODE, Node.CDATA_SECTION_NODE]:
|
||||||
|
handler.characters(node.nodeValue)
|
||||||
|
|
||||||
|
elif node.nodeType == Node.DOCUMENT_NODE:
|
||||||
|
handler.startDocument()
|
||||||
|
for child in node.childNodes: dom2sax(child, handler, nsmap)
|
||||||
|
handler.endDocument()
|
||||||
|
|
||||||
|
else:
|
||||||
|
# ATTRIBUTE_NODE
|
||||||
|
# ENTITY_NODE
|
||||||
|
# PROCESSING_INSTRUCTION_NODE
|
||||||
|
# COMMENT_NODE
|
||||||
|
# DOCUMENT_TYPE_NODE
|
||||||
|
# NOTATION_NODE
|
||||||
|
pass
|
||||||
|
@ -1,208 +1,5 @@
|
|||||||
try:
|
import etreefull
|
||||||
from xml.etree import ElementTree
|
|
||||||
except ImportError:
|
|
||||||
from elementtree import ElementTree
|
|
||||||
|
|
||||||
import _base
|
|
||||||
|
|
||||||
class Element(_base.Node):
|
|
||||||
def __init__(self, name):
|
|
||||||
self._element = ElementTree.Element(name)
|
|
||||||
self.name = name
|
|
||||||
self.parent = None
|
|
||||||
self._childNodes = []
|
|
||||||
self._flags = []
|
|
||||||
|
|
||||||
#Set the element text and tail to the empty string rather than None
|
|
||||||
#XXX - is this desirable or should we do it on a case by case basis?
|
|
||||||
self._element.text = ""
|
|
||||||
self._element.tail = ""
|
|
||||||
|
|
||||||
def _setName(self, name):
|
|
||||||
self._element.tag = name
|
|
||||||
|
|
||||||
def _getName(self):
|
|
||||||
return self._element.tag
|
|
||||||
|
|
||||||
name = property(_getName, _setName)
|
|
||||||
|
|
||||||
def _getAttributes(self):
|
|
||||||
return self._element.attrib
|
|
||||||
|
|
||||||
def _setAttributes(self, attributes):
|
|
||||||
#Delete existing attributes first
|
|
||||||
#XXX - there may be a better way to do this...
|
|
||||||
for key in self._element.attrib.keys():
|
|
||||||
del self._element.attrib[key]
|
|
||||||
for key, value in attributes.iteritems():
|
|
||||||
self._element.set(key, value)
|
|
||||||
|
|
||||||
attributes = property(_getAttributes, _setAttributes)
|
|
||||||
|
|
||||||
def _getChildNodes(self):
|
|
||||||
return self._childNodes
|
|
||||||
|
|
||||||
def _setChildNodes(self, value):
|
|
||||||
del self._element[:]
|
|
||||||
self._childNodes = []
|
|
||||||
for element in value:
|
|
||||||
self.insertChild(element)
|
|
||||||
|
|
||||||
childNodes = property(_getChildNodes, _setChildNodes)
|
|
||||||
|
|
||||||
def hasContent(self):
|
|
||||||
"""Return true if the node has children or text"""
|
|
||||||
return bool(self._element.text or self._element.getchildren())
|
|
||||||
|
|
||||||
def appendChild(self, node):
|
|
||||||
self._childNodes.append(node)
|
|
||||||
self._element.append(node._element)
|
|
||||||
node.parent = self
|
|
||||||
|
|
||||||
def insertBefore(self, node, refNode):
|
|
||||||
index = self._element.getchildren().index(refNode._element)
|
|
||||||
self._element.insert(index, node._element)
|
|
||||||
node.parent = self
|
|
||||||
|
|
||||||
def removeChild(self, node):
|
|
||||||
self._element.remove(node._element)
|
|
||||||
node.parent=None
|
|
||||||
|
|
||||||
def insertText(self, data, insertBefore=None):
|
|
||||||
if not(len(self._element)):
|
|
||||||
self._element.text += data
|
|
||||||
elif insertBefore is None:
|
|
||||||
#Insert the text as the tail of the last child element
|
|
||||||
self._element[-1].tail += data
|
|
||||||
else:
|
|
||||||
#Insert the text before the specified node
|
|
||||||
children = self._element.getchildren()
|
|
||||||
index = children.index(insertBefore._element)
|
|
||||||
if index > 0:
|
|
||||||
self._element[index-1].tail += data
|
|
||||||
else:
|
|
||||||
self._element.text += data
|
|
||||||
|
|
||||||
def cloneNode(self):
|
|
||||||
element = Element(self.name)
|
|
||||||
element.attributes = self.attributes
|
|
||||||
return element
|
|
||||||
|
|
||||||
def reparentChildren(self, newParent):
|
|
||||||
if newParent.childNodes:
|
|
||||||
newParent.childNodes[-1]._element.tail += self._element.text
|
|
||||||
else:
|
|
||||||
newParent._element.text += self._element.text
|
|
||||||
self._element.text = ""
|
|
||||||
_base.Node.reparentChildren(self, newParent)
|
|
||||||
|
|
||||||
class Comment(Element):
|
|
||||||
def __init__(self, data):
|
|
||||||
Element.__init__(self, Comment)
|
|
||||||
self._element.text = data
|
|
||||||
|
|
||||||
def _getData(self):
|
|
||||||
return self._element.text
|
|
||||||
|
|
||||||
def _setData(self, value):
|
|
||||||
self._element.text = value
|
|
||||||
|
|
||||||
data = property(_getData, _setData)
|
|
||||||
|
|
||||||
class DocumentType(Element):
|
|
||||||
def __init__(self, name):
|
|
||||||
Element.__init__(self, DocumentType)
|
|
||||||
self._element.text = name
|
|
||||||
|
|
||||||
class Document(Element):
|
|
||||||
def __init__(self):
|
|
||||||
Element.__init__(self, Document)
|
|
||||||
|
|
||||||
def testSerializer(element):
|
|
||||||
rv = []
|
|
||||||
finalText = None
|
|
||||||
def serializeElement(element, indent=0):
|
|
||||||
if element.tag is DocumentType:
|
|
||||||
rv.append("|%s<!DOCTYPE %s>"%(' '*indent, element.text))
|
|
||||||
elif element.tag is Document:
|
|
||||||
rv.append("#document")
|
|
||||||
if element.text:
|
|
||||||
rv.append("|%s\"%s\""%(' '*(indent+2), element.text))
|
|
||||||
if element.tail:
|
|
||||||
finalText = element.tail
|
|
||||||
elif element.tag is Comment:
|
|
||||||
rv.append("|%s<!-- %s -->"%(' '*indent, element.text))
|
|
||||||
else:
|
|
||||||
rv.append("|%s<%s>"%(' '*indent, element.tag))
|
|
||||||
if hasattr(element, "attrib"):
|
|
||||||
for name, value in element.attrib.iteritems():
|
|
||||||
rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
|
|
||||||
if element.text:
|
|
||||||
rv.append("|%s\"%s\"" %(' '*(indent+2), element.text))
|
|
||||||
indent += 2
|
|
||||||
for child in element.getchildren():
|
|
||||||
serializeElement(child, indent)
|
|
||||||
if element.tail:
|
|
||||||
rv.append("|%s\"%s\"" %(' '*(indent-2), element.tail))
|
|
||||||
serializeElement(element, 0)
|
|
||||||
|
|
||||||
if finalText is not None:
|
|
||||||
rv.append("|%s\"%s\""%(' '*2, finalText))
|
|
||||||
|
|
||||||
return "\n".join(rv)
|
|
||||||
|
|
||||||
def tostring(element):
|
|
||||||
"""Serialize an element and its child nodes to a string"""
|
|
||||||
rv = []
|
|
||||||
finalText = None
|
|
||||||
def serializeElement(element):
|
|
||||||
if element.tag is DocumentType:
|
|
||||||
rv.append("<!DOCTYPE %s>"%(element.text,))
|
|
||||||
elif element.tag is Document:
|
|
||||||
if element.text:
|
|
||||||
rv.append(element.text)
|
|
||||||
if element.tail:
|
|
||||||
finalText = element.tail
|
|
||||||
|
|
||||||
for child in element.getchildren():
|
|
||||||
serializeElement(child)
|
|
||||||
|
|
||||||
elif element.tag is Comment:
|
|
||||||
rv.append("<!--%s-->"%(element.text,))
|
|
||||||
else:
|
|
||||||
#This is assumed to be an ordinary element
|
|
||||||
if not element.attrib:
|
|
||||||
rv.append("<%s>"%(element.tag,))
|
|
||||||
else:
|
|
||||||
attr = " ".join(["%s=\"%s\""%(name, value)
|
|
||||||
for name, value in element.attrib.iteritems()])
|
|
||||||
rv.append("<%s %s>"%(element.tag, attr))
|
|
||||||
if element.text:
|
|
||||||
rv.append(element.text)
|
|
||||||
|
|
||||||
for child in element.getchildren():
|
|
||||||
serializeElement(child)
|
|
||||||
|
|
||||||
rv.append("</%s>"%(element.tag,))
|
|
||||||
|
|
||||||
if element.tail:
|
|
||||||
rv.append(element.tail)
|
|
||||||
|
|
||||||
serializeElement(element)
|
|
||||||
|
|
||||||
if finalText is not None:
|
|
||||||
rv.append("%s\""%(' '*2, finalText))
|
|
||||||
|
|
||||||
return "".join(rv)
|
|
||||||
|
|
||||||
class TreeBuilder(_base.TreeBuilder):
|
|
||||||
documentClass = Document
|
|
||||||
doctypeClass = DocumentType
|
|
||||||
elementClass = Element
|
|
||||||
commentClass = Comment
|
|
||||||
|
|
||||||
def testSerializer(self, element):
|
|
||||||
return testSerializer(element)
|
|
||||||
|
|
||||||
|
class TreeBuilder(etreefull.TreeBuilder):
|
||||||
def getDocument(self):
|
def getDocument(self):
|
||||||
return self.document._element
|
return self.document._element.find("html")
|
||||||
|
216
planet/html5lib/treebuilders/etreefull.py
Normal file
216
planet/html5lib/treebuilders/etreefull.py
Normal file
@ -0,0 +1,216 @@
|
|||||||
|
try:
|
||||||
|
from xml.etree import ElementTree
|
||||||
|
except ImportError:
|
||||||
|
from elementtree import ElementTree
|
||||||
|
|
||||||
|
import _base
|
||||||
|
|
||||||
|
class Element(_base.Node):
|
||||||
|
def __init__(self, name):
|
||||||
|
self._element = ElementTree.Element(name)
|
||||||
|
self.name = name
|
||||||
|
self.parent = None
|
||||||
|
self._childNodes = []
|
||||||
|
self._flags = []
|
||||||
|
|
||||||
|
def _setName(self, name):
|
||||||
|
self._element.tag = name
|
||||||
|
|
||||||
|
def _getName(self):
|
||||||
|
return self._element.tag
|
||||||
|
|
||||||
|
name = property(_getName, _setName)
|
||||||
|
|
||||||
|
def _getAttributes(self):
|
||||||
|
return self._element.attrib
|
||||||
|
|
||||||
|
def _setAttributes(self, attributes):
|
||||||
|
#Delete existing attributes first
|
||||||
|
#XXX - there may be a better way to do this...
|
||||||
|
for key in self._element.attrib.keys():
|
||||||
|
del self._element.attrib[key]
|
||||||
|
for key, value in attributes.iteritems():
|
||||||
|
self._element.set(key, value)
|
||||||
|
|
||||||
|
attributes = property(_getAttributes, _setAttributes)
|
||||||
|
|
||||||
|
def _getChildNodes(self):
|
||||||
|
return self._childNodes
|
||||||
|
|
||||||
|
def _setChildNodes(self, value):
|
||||||
|
del self._element[:]
|
||||||
|
self._childNodes = []
|
||||||
|
for element in value:
|
||||||
|
self.insertChild(element)
|
||||||
|
|
||||||
|
childNodes = property(_getChildNodes, _setChildNodes)
|
||||||
|
|
||||||
|
def hasContent(self):
|
||||||
|
"""Return true if the node has children or text"""
|
||||||
|
return bool(self._element.text or self._element.getchildren())
|
||||||
|
|
||||||
|
def appendChild(self, node):
|
||||||
|
self._childNodes.append(node)
|
||||||
|
self._element.append(node._element)
|
||||||
|
node.parent = self
|
||||||
|
|
||||||
|
def insertBefore(self, node, refNode):
|
||||||
|
index = self._element.getchildren().index(refNode._element)
|
||||||
|
self._element.insert(index, node._element)
|
||||||
|
node.parent = self
|
||||||
|
|
||||||
|
def removeChild(self, node):
|
||||||
|
self._element.remove(node._element)
|
||||||
|
node.parent=None
|
||||||
|
|
||||||
|
def insertText(self, data, insertBefore=None):
|
||||||
|
if not(len(self._element)):
|
||||||
|
if not self._element.text:
|
||||||
|
self._element.text = ""
|
||||||
|
self._element.text += data
|
||||||
|
elif insertBefore is None:
|
||||||
|
#Insert the text as the tail of the last child element
|
||||||
|
if not self._element[-1].tail:
|
||||||
|
self._element[-1].tail = ""
|
||||||
|
self._element[-1].tail += data
|
||||||
|
else:
|
||||||
|
#Insert the text before the specified node
|
||||||
|
children = self._element.getchildren()
|
||||||
|
index = children.index(insertBefore._element)
|
||||||
|
if index > 0:
|
||||||
|
if not self._element[index-1].tail:
|
||||||
|
self._element[index-1].tail = ""
|
||||||
|
self._element[index-1].tail += data
|
||||||
|
else:
|
||||||
|
if not self._element.text:
|
||||||
|
self._element.text = ""
|
||||||
|
self._element.text += data
|
||||||
|
|
||||||
|
def cloneNode(self):
|
||||||
|
element = Element(self.name)
|
||||||
|
element.attributes = self.attributes
|
||||||
|
return element
|
||||||
|
|
||||||
|
def reparentChildren(self, newParent):
|
||||||
|
if newParent.childNodes:
|
||||||
|
newParent.childNodes[-1]._element.tail += self._element.text
|
||||||
|
else:
|
||||||
|
if not newParent._element.text:
|
||||||
|
newParent._element.text = ""
|
||||||
|
if self._element.text is not None:
|
||||||
|
newParent._element.text += self._element.text
|
||||||
|
self._element.text = ""
|
||||||
|
_base.Node.reparentChildren(self, newParent)
|
||||||
|
|
||||||
|
class Comment(Element):
|
||||||
|
def __init__(self, data):
|
||||||
|
#Use the superclass constructor to set all properties on the
|
||||||
|
#wrapper element
|
||||||
|
Element.__init__(self, None)
|
||||||
|
self._element = ElementTree.Comment(data)
|
||||||
|
|
||||||
|
def _getData(self):
|
||||||
|
return self._element.text
|
||||||
|
|
||||||
|
def _setData(self, value):
|
||||||
|
self._element.text = value
|
||||||
|
|
||||||
|
data = property(_getData, _setData)
|
||||||
|
|
||||||
|
class DocumentType(Element):
|
||||||
|
def __init__(self, name):
|
||||||
|
Element.__init__(self, DocumentType)
|
||||||
|
self._element.text = name
|
||||||
|
|
||||||
|
class Document(Element):
|
||||||
|
def __init__(self):
|
||||||
|
Element.__init__(self, Document)
|
||||||
|
|
||||||
|
def testSerializer(element):
|
||||||
|
rv = []
|
||||||
|
finalText = None
|
||||||
|
def serializeElement(element, indent=0):
|
||||||
|
if element.tag is DocumentType:
|
||||||
|
rv.append("|%s<!DOCTYPE %s>"%(' '*indent, element.text))
|
||||||
|
elif element.tag is Document:
|
||||||
|
rv.append("#document")
|
||||||
|
if element.text:
|
||||||
|
rv.append("|%s\"%s\""%(' '*(indent+2), element.text))
|
||||||
|
if element.tail:
|
||||||
|
finalText = element.tail
|
||||||
|
elif element.tag is ElementTree.Comment:
|
||||||
|
rv.append("|%s<!-- %s -->"%(' '*indent, element.text))
|
||||||
|
else:
|
||||||
|
rv.append("|%s<%s>"%(' '*indent, element.tag))
|
||||||
|
if hasattr(element, "attrib"):
|
||||||
|
for name, value in element.attrib.iteritems():
|
||||||
|
rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
|
||||||
|
if element.text:
|
||||||
|
rv.append("|%s\"%s\"" %(' '*(indent+2), element.text))
|
||||||
|
indent += 2
|
||||||
|
for child in element.getchildren():
|
||||||
|
serializeElement(child, indent)
|
||||||
|
if element.tail:
|
||||||
|
rv.append("|%s\"%s\"" %(' '*(indent-2), element.tail))
|
||||||
|
serializeElement(element, 0)
|
||||||
|
|
||||||
|
if finalText is not None:
|
||||||
|
rv.append("|%s\"%s\""%(' '*2, finalText))
|
||||||
|
|
||||||
|
return "\n".join(rv)
|
||||||
|
|
||||||
|
def tostring(element):
|
||||||
|
"""Serialize an element and its child nodes to a string"""
|
||||||
|
rv = []
|
||||||
|
finalText = None
|
||||||
|
def serializeElement(element):
|
||||||
|
if element.tag is DocumentType:
|
||||||
|
rv.append("<!DOCTYPE %s>"%(element.text,))
|
||||||
|
elif element.tag is Document:
|
||||||
|
if element.text:
|
||||||
|
rv.append(element.text)
|
||||||
|
if element.tail:
|
||||||
|
finalText = element.tail
|
||||||
|
|
||||||
|
for child in element.getchildren():
|
||||||
|
serializeElement(child)
|
||||||
|
|
||||||
|
elif element.tag is ElementTree.Comment:
|
||||||
|
rv.append("<!--%s-->"%(element.text,))
|
||||||
|
else:
|
||||||
|
#This is assumed to be an ordinary element
|
||||||
|
if not element.attrib:
|
||||||
|
rv.append("<%s>"%(element.tag,))
|
||||||
|
else:
|
||||||
|
attr = " ".join(["%s=\"%s\""%(name, value)
|
||||||
|
for name, value in element.attrib.iteritems()])
|
||||||
|
rv.append("<%s %s>"%(element.tag, attr))
|
||||||
|
if element.text:
|
||||||
|
rv.append(element.text)
|
||||||
|
|
||||||
|
for child in element.getchildren():
|
||||||
|
serializeElement(child)
|
||||||
|
|
||||||
|
rv.append("</%s>"%(element.tag,))
|
||||||
|
|
||||||
|
if element.tail:
|
||||||
|
rv.append(element.tail)
|
||||||
|
|
||||||
|
serializeElement(element)
|
||||||
|
|
||||||
|
if finalText is not None:
|
||||||
|
rv.append("%s\""%(' '*2, finalText))
|
||||||
|
|
||||||
|
return "".join(rv)
|
||||||
|
|
||||||
|
class TreeBuilder(_base.TreeBuilder):
|
||||||
|
documentClass = Document
|
||||||
|
doctypeClass = DocumentType
|
||||||
|
elementClass = Element
|
||||||
|
commentClass = Comment
|
||||||
|
|
||||||
|
def testSerializer(self, element):
|
||||||
|
return testSerializer(element)
|
||||||
|
|
||||||
|
def getDocument(self):
|
||||||
|
return self.document._element
|
Loading…
x
Reference in New Issue
Block a user