diff --git a/planet/feedparser.py b/planet/feedparser.py
index 1860539..b3b2467 100755
--- a/planet/feedparser.py
+++ b/planet/feedparser.py
@@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
 Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
 """
 
-__version__ = "4.2-pre-" + "$Revision: 1.146 $"[11:16] + "-cvs"
+__version__ = "4.2-pre-" + "$Revision: 1.147 $"[11:16] + "-cvs"
 __license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
 
 Redistribution and use in source and binary forms, with or without modification,
@@ -2303,19 +2303,20 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
        'class', 'color', 'color-rendering', 'content', 'cx', 'cy', 'd',
        'descent', 'display', 'dur', 'end', 'fill', 'fill-rule', 'font-family',
        'font-size', 'font-stretch', 'font-style', 'font-variant',
-       'font-weight', 'from', 'fx', 'fy', 'g1', 'g2', 'glyph-name', 'hanging',
-       'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
-       'keyPoints', 'keySplines', 'keyTimes', 'lang', 'mathematical', 'max',
-       'min', 'name', 'offset', 'opacity', 'origin', 'overline-position',
-       'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
-       'preserveAspectRatio', 'r', 'repeatCount', 'repeatDur',
-       'requiredExtensions', 'requiredFeatures', 'restart', 'rotate', 'rx',
-       'ry', 'slope', 'stemh', 'stemv', 'stop-color', 'stop-opacity',
-       'strikethrough-position', 'strikethrough-thickness', 'stroke',
-       'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
-       'stroke-linejoin', 'stroke-miterlimit', 'stroke-width',
-       'systemLanguage', 'target', 'text-anchor', 'to', 'transform', 'type',
-       'u1', 'u2', 'underline-position', 'underline-thickness', 'unicode',
+       'font-weight', 'from', 'fx', 'fy', 'g1', 'g2', 'glyph-name', 
+       'gradientUnits', 'hanging', 'height', 'horiz-adv-x', 'horiz-origin-x',
+       'id', 'ideographic', 'k', 'keyPoints', 'keySplines', 'keyTimes',
+       'lang', 'mathematical', 'max', 'min', 'name', 'offset', 'opacity',
+       'origin', 'overline-position', 'overline-thickness', 'panose-1',
+       'path', 'pathLength', 'points', 'preserveAspectRatio', 'r',
+       'repeatCount', 'repeatDur', 'requiredExtensions', 'requiredFeatures',
+       'restart', 'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 
+       'stop-color', 'stop-opacity', 'strikethrough-position',
+       'strikethrough-thickness', 'stroke', 'stroke-dasharray',
+       'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin',
+       'stroke-miterlimit', 'stroke-width', 'systemLanguage', 'target',
+       'text-anchor', 'to', 'transform', 'type', 'u1', 'u2',
+       'underline-position', 'underline-thickness', 'unicode',
        'unicode-range', 'units-per-em', 'values', 'version', 'viewBox',
        'visibility', 'width', 'widths', 'x', 'x-height', 'x1', 'x2',
        'xlink:actuate', 'xlink:arcrole', 'xlink:href', 'xlink:role',
@@ -3021,6 +3022,21 @@ _additional_timezones = {'AT': -400, 'ET': -500, 'CT': -600, 'MT': -700, 'PT': -
 rfc822._timezones.update(_additional_timezones)
 registerDateHandler(_parse_date_rfc822)    
 
+def _parse_date_perforce(aDateString):
+	"""parse a date in yyyy/mm/dd hh:mm:ss TTT format"""
+	# Fri, 2006/09/15 08:19:53 EDT
+	_my_date_pattern = re.compile( \
+		r'(\w{,3}), (\d{,4})/(\d{,2})/(\d{2}) (\d{,2}):(\d{2}):(\d{2}) (\w{,3})')
+
+	dow, year, month, day, hour, minute, second, tz = \
+		_my_date_pattern.search(aDateString).groups()
+	months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+	dateString = "%s, %s %s %s %s:%s:%s %s" % (dow, day, months[int(month) - 1], year, hour, minute, second, tz)
+	tm = rfc822.parsedate_tz(dateString)
+	if tm:
+		return time.gmtime(rfc822.mktime_tz(tm))
+registerDateHandler(_parse_date_perforce)
+
 def _parse_date(dateString):
     '''Parses a variety of date formats into a 9-tuple in GMT'''
     for handler in _date_handlers:
diff --git a/planet/html5lib/html5parser.py b/planet/html5lib/html5parser.py
index 6e1e70a..e075001 100644
--- a/planet/html5lib/html5parser.py
+++ b/planet/html5lib/html5parser.py
@@ -37,10 +37,10 @@ class HTMLParser(object):
 
     def __init__(self, strict = False, tree=simpletree.TreeBuilder):
         """
-        strict - raise an exception when a parse error is encountered 
-        
-        tree - a treebuilder class controlling the type of tree that will be 
-        returned. This class is almost always a subclass of 
+        strict - raise an exception when a parse error is encountered
+
+        tree - a treebuilder class controlling the type of tree that will be
+        returned. This class is almost always a subclass of
         html5lib.treebuilders._base.TreeBuilder
         """
 
@@ -72,10 +72,10 @@ class HTMLParser(object):
 
     def parse(self, stream, encoding=None, innerHTML=False):
         """Parse a HTML document into a well-formed tree
-        
+
         stream - a filelike object or string containing the HTML to be parsed
-        
-        innerHTML - Are we parsing in innerHTML mode (note innerHTML=True 
+
+        innerHTML - Are we parsing in innerHTML mode (note innerHTML=True
         is not yet supported)
 
         The optional encoding parameter must be a string that indicates
@@ -85,6 +85,7 @@ class HTMLParser(object):
         """
 
         self.tree.reset()
+        self.firstStartTag = False
         self.errors = []
 
         self.phase = self.phases["initial"]
@@ -119,8 +120,8 @@ class HTMLParser(object):
         return self.tree.getDocument()
 
     def parseError(self, data="XXX ERROR MESSAGE NEEDED"):
-        # The idea is to make data mandatory.
-        self.errors.append(data)
+        # XXX The idea is to make data mandatory.
+        self.errors.append((self.tokenizer.stream.position(), data))
         if self.strict:
             raise ParseError
 
@@ -130,7 +131,7 @@ class HTMLParser(object):
 
     def normalizeToken(self, token):
         """ HTML5 specific normalizations to the token stream """
-       
+
         if token["type"] == "EmptyTag":
             # When a solidus (/) is encountered within a tag name what happens
             # depends on whether the current tag name matches that of a void
@@ -159,14 +160,12 @@ class HTMLParser(object):
                 token["data"] = {}
 
         elif token["type"] == "EndTag":
+            if token["data"]:
+               self.parseError(_("End tag contains unexpected attributes."))
             token["name"] = token["name"].lower()
 
         return token
 
-    #XXX - almost everthing after this point should be moved into a
-    #seperate treebuilder object
-
-
     def resetInsertionMode(self):
         # The name of this method is mostly historical. (It's also used in the
         # specification.)
@@ -231,13 +230,19 @@ class Phase(object):
 
     def processEOF(self):
         self.tree.generateImpliedEndTags()
-        if self.parser.innerHTML == True and len(self.tree.openElements) > 1:
-            # XXX No need to check for "body" because our EOF handling is not
-            # per specification. (Specification needs an update.)
-            #
-            # XXX Need to check this more carefully in the future.
-            self.parser.parseError()
-        # Stop parsing
+        if len(self.tree.openElements) > 2:
+            self.parser.parseError(_("Unexpected end of file. "
+              u"Missing closing tags."))
+        elif len(self.tree.openElements) == 2 and\
+          self.tree.openElements[1].name != "body":
+            # This happens for framesets or something?
+            self.parser.parseError(_("Unexpected end of file. Expected end "
+              u"tag (" + self.tree.openElements[1].name + u") first."))
+        elif self.parser.innerHTML and len(self.tree.openElements) > 1 :
+            # XXX This is not what the specification says. Not sure what to do
+            # here.
+            self.parser.parseError(_("XXX innerHTML EOF"))
+        # Betting ends.
 
     def processComment(self, data):
         # For most phases the following is correct. Where it's not it will be
@@ -245,7 +250,7 @@ class Phase(object):
         self.tree.insertComment(data, self.tree.openElements[-1])
 
     def processDoctype(self, name, error):
-        self.parser.parseError()
+        self.parser.parseError(_("Unexpected DOCTYPE. Ignored."))
 
     def processSpaceCharacters(self, data):
         self.tree.insertText(data)
@@ -254,11 +259,14 @@ class Phase(object):
         self.startTagHandler[name](name, attributes)
 
     def startTagHtml(self, name, attributes):
+        if self.parser.firstStartTag == False and name == "html":
+           self.parser.parseError(_("html needs to be the first start tag."))
         # XXX Need a check here to see if the first start tag token emitted is
         # this token... If it's not, invoke self.parser.parseError().
         for attr, value in attributes.iteritems():
             if attr not in self.tree.openElements[0].attributes:
                 self.tree.openElements[0].attributes[attr] = value
+        self.parser.firstStartTag = False
 
     def processEndTag(self, name):
         self.endTagHandler[name](name)
@@ -270,7 +278,7 @@ class InitialPhase(Phase):
     # "quirks mode". It is expected that a future version of HTML5 will defin
     # this.
     def processEOF(self):
-        self.parser.parseError(_("No DOCTYPE seen."))
+        self.parser.parseError(_(u"Unexpected End of file. Expected DOCTYPE."))
         self.parser.phase = self.parser.phases["rootElement"]
         self.parser.phase.processEOF()
 
@@ -279,7 +287,7 @@ class InitialPhase(Phase):
 
     def processDoctype(self, name, error):
         if error:
-            self.parser.parseError(_("DOCTYPE is in error."))
+            self.parser.parseError(_("Erroneous DOCTYPE."))
         self.tree.insertDoctype(name)
         self.parser.phase = self.parser.phases["rootElement"]
 
@@ -287,17 +295,20 @@ class InitialPhase(Phase):
         self.tree.insertText(data, self.tree.document)
 
     def processCharacters(self, data):
-        self.parser.parseError(_("No DOCTYPE seen."))
+        self.parser.parseError(_(u"Unexpected non-space characters. "
+          u"Expected DOCTYPE."))
         self.parser.phase = self.parser.phases["rootElement"]
         self.parser.phase.processCharacters(data)
 
     def processStartTag(self, name, attributes):
-        self.parser.parseError(_("No DOCTYPE seen."))
+        self.parser.parseError(_(u"Unexpected start tag (" + name +\
+          u"). Expected DOCTYPE."))
         self.parser.phase = self.parser.phases["rootElement"]
         self.parser.phase.processStartTag(name, attributes)
 
     def processEndTag(self, name):
-        self.parser.parseError(_("No DOCTYPE seen."))
+        self.parser.parseError(_(u"Unexpected end tag (" + name +\
+          "). Expected DOCTYPE."))
         self.parser.phase = self.parser.phases["rootElement"]
         self.parser.phase.processEndTag(name)
 
@@ -326,6 +337,8 @@ class RootElementPhase(Phase):
         self.parser.phase.processCharacters(data)
 
     def processStartTag(self, name, attributes):
+        if name == "html":
+            self.parser.firstStartTag = True
         self.insertHtmlElement()
         self.parser.phase.processStartTag(name, attributes)
 
@@ -372,7 +385,7 @@ class BeforeHeadPhase(Phase):
 
     def endTagOther(self, name):
         self.parser.parseError(_("Unexpected end tag (" + name +\
-          ") after the root element."))
+          ") after the (implied) root element."))
 
 class InHeadPhase(Phase):
     def __init__(self, parser, tree):
@@ -380,7 +393,8 @@ class InHeadPhase(Phase):
 
         self.startTagHandler =  utils.MethodDispatcher([
             ("html", self.startTagHtml),
-            (("title", "style"), self.startTagTitleStyle),
+            ("title", self.startTagTitle),
+            ("style", self.startTagStyle),
             ("script", self.startTagScript),
             (("base", "link", "meta"), self.startTagBaseLinkMeta),
             ("head", self.startTagHead)
@@ -405,6 +419,8 @@ class InHeadPhase(Phase):
     # the real thing
     def processEOF(self):
         if self.tree.openElements[-1].name in ("title", "style", "script"):
+            self.parser.parseError(_(u"Unexpected end of file. "
+              u"Expected end tag (" + self.tree.openElements[-1].name + ")."))
             self.tree.openElements.pop()
         self.anythingElse()
         self.parser.phase.processEOF()
@@ -421,25 +437,31 @@ class InHeadPhase(Phase):
         self.tree.headPointer = self.tree.openElements[-1]
         self.parser.phase = self.parser.phases["inHead"]
 
-    def startTagTitleStyle(self, name, attributes):
-        cmFlags = {"title":"RCDATA", "style":"CDATA"}
+    def startTagTitle(self, name, attributes):
         element = self.tree.createElement(name, attributes)
         self.appendToHead(element)
         self.tree.openElements.append(element)
-        self.parser.tokenizer.contentModelFlag =\
-          contentModelFlags[cmFlags[name]]
+        self.parser.tokenizer.contentModelFlag = contentModelFlags["RCDATA"]
+
+    def startTagStyle(self, name, attributes):
+        element = self.tree.createElement(name, attributes)
+        if self.tree.headPointer is not None and\
+          self.parser.phase == self.parser.phases["inHead"]:
+            self.appendToHead(element)
+        else:
+            self.tree.openElements[-1].appendChild(element)
+        self.tree.openElements.append(element)
+        self.parser.tokenizer.contentModelFlag = contentModelFlags["CDATA"]
 
     def startTagScript(self, name, attributes):
         element = self.tree.createElement(name, attributes)
         element._flags.append("parser-inserted")
-
-        # XXX in theory we should check if we're actually in the InHead state
-        # here and if the headElementPointer is not zero but it seems to work
-        # without that being the case.
-        self.tree.openElements[-1].appendChild(element)
+        if self.tree.headPointer is not None and\
+          self.parser.phase == self.parser.phases["inHead"]:
+            self.appendToHead(element)
+        else:
+            self.tree.openElements[-1].appendChild(element)
         self.tree.openElements.append(element)
-
-        # XXX AT we could use self.tree.insertElement(name, attributes) ...
         self.parser.tokenizer.contentModelFlag = contentModelFlags["CDATA"]
 
     def startTagBaseLinkMeta(self, name, attributes):
@@ -454,7 +476,7 @@ class InHeadPhase(Phase):
         if self.tree.openElements[-1].name == "head":
             self.tree.openElements.pop()
         else:
-            self.parser.parseError()
+            self.parser.parseError(_(u"Unexpected end tag (head). Ignored."))
         self.parser.phase = self.parser.phases["afterHead"]
 
     def endTagHtml(self, name):
@@ -465,11 +487,12 @@ class InHeadPhase(Phase):
         if self.tree.openElements[-1].name == name:
             self.tree.openElements.pop()
         else:
-            self.parser.parseError(_("Unexpected end tag " + name +\
-              ". Ignored."))
+            self.parser.parseError(_(u"Unexpected end tag (" + name +\
+              "). Ignored."))
 
     def endTagOther(self, name):
-        self.parser.parseError(_("Unexpected end tag " + name + ". Ignored."))
+        self.parser.parseError(_(u"Unexpected end tag (" + name +\
+          "). Ignored."))
 
     def anythingElse(self):
         if self.tree.openElements[-1].name == "head":
@@ -507,7 +530,8 @@ class AfterHeadPhase(Phase):
         self.parser.phase = self.parser.phases["inFrameset"]
 
     def startTagFromHead(self, name, attributes):
-        self.parser.parseError()
+        self.parser.parseError(_(u"Unexpected start tag (" + name +\
+          ") that can be in head. Moved."))
         self.parser.phase = self.parser.phases["inHead"]
         self.parser.phase.processStartTag(name, attributes)
 
@@ -531,8 +555,8 @@ class InBodyPhase(Phase):
         Phase.__init__(self, parser, tree)
         self.startTagHandler = utils.MethodDispatcher([
             ("html", self.startTagHtml),
-            ("script", self.startTagScript),
-            (("base", "link", "meta", "style", "title"),
+            (("script", "style"), self.startTagScriptStyle),
+            (("base", "link", "meta", "title"),
               self.startTagFromHead),
             ("body", self.startTagBody),
             (("address", "blockquote", "center", "dir", "div", "dl",
@@ -578,11 +602,12 @@ class InBodyPhase(Phase):
             (("a", "b", "big", "em", "font", "i", "nobr", "s", "small",
               "strike", "strong", "tt", "u"), self.endTagFormatting),
             (("marquee", "object", "button"), self.endTagButtonMarqueeObject),
-            (("caption", "col", "colgroup", "frame", "frameset", "head",
-              "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
-              "tr", "area", "basefont", "bgsound", "br", "embed", "hr",
-              "image", "img", "input", "isindex", "param", "select", "spacer",
-              "table",  "wbr"),self.endTagMisplacedNone),
+            (("head", "frameset", "select", "optgroup", "option", "table",
+              "caption", "colgroup", "col", "thead", "tfoot", "tbody", "tr",
+              "td", "th"), self.endTagMisplaced),
+            (("area", "basefont", "bgsound", "br", "embed", "hr", "image",
+              "img", "input", "isindex", "param", "spacer", "wbr", "frame"),
+              self.endTagNone),
             (("noframes", "noscript", "noembed", "textarea", "xmp", "iframe"),
               self.endTagCdataTextAreaXmp),
             (("event-source", "section", "nav", "article", "aside", "header",
@@ -604,16 +629,16 @@ class InBodyPhase(Phase):
         self.tree.reconstructActiveFormattingElements()
         self.tree.insertText(data)
 
-    def startTagScript(self, name, attributes):
+    def startTagScriptStyle(self, name, attributes):
         self.parser.phases["inHead"].processStartTag(name, attributes)
 
     def startTagFromHead(self, name, attributes):
-        self.parser.parseError(_("Unexpected start tag " + name +\
-          " that belongs in the head. Moved."))
+        self.parser.parseError(_(u"Unexpected start tag (" + name +\
+          ") that belongs in the head. Moved."))
         self.parser.phases["inHead"].processStartTag(name, attributes)
 
     def startTagBody(self, name, attributes):
-        self.parser.parseError(_("Unexpected start tag body"))
+        self.parser.parseError(_(u"Unexpected start tag (body)."))
         if len(self.tree.openElements) == 1 \
           or self.tree.openElements[1].name != "body":
             assert self.parser.innerHTML
@@ -629,7 +654,7 @@ class InBodyPhase(Phase):
 
     def startTagForm(self, name, attributes):
         if self.tree.formPointer:
-            self.parser.parseError()
+            self.parser.parseError("Unexpected start tag (form). Ignored.")
         else:
             if self.tree.elementInScope("p"):
                 self.endTagP("p")
@@ -667,7 +692,8 @@ class InBodyPhase(Phase):
             self.endTagP("p")
         for item in headingElements:
             if self.tree.elementInScope(item):
-                self.parser.parseError()
+                self.parser.parseError(_("Unexpected start tag (" + name +\
+                  ")."))
                 item = self.tree.openElements.pop()
                 while item.name not in headingElements:
                     item = self.tree.openElements.pop()
@@ -677,7 +703,8 @@ class InBodyPhase(Phase):
     def startTagA(self, name, attributes):
         afeAElement = self.tree.elementInActiveFormattingElements("a")
         if afeAElement:
-            self.parser.parseError()
+            self.parser.parseError(_(u"Unexpected start tag (a) implies "
+              "end tag (a)."))
             self.endTagFormatting("a")
             if afeAElement in self.tree.openElements:
                 self.tree.openElements.remove(afeAElement)
@@ -692,8 +719,8 @@ class InBodyPhase(Phase):
 
     def startTagButton(self, name, attributes):
         if self.tree.elementInScope("button"):
-            self.parser.parseError(_("Unexpected start tag button. Implying"
-              "button end tag."))
+            self.parser.parseError(_("Unexpected start tag (button) implied "
+              "end tag (button)."))
             self.processEndTag("button")
             self.parser.phase.processStartTag(name, attributes)
         else:
@@ -730,8 +757,8 @@ class InBodyPhase(Phase):
 
     def startTagImage(self, name, attributes):
         # No really...
-        self.parser.parseError(_("Unexpected start tag image. Use img "
-          "instead"))
+        self.parser.parseError(_(u"Unexpected start tag (image). Treated "
+          u"as img."))
         self.processStartTag("img", attributes)
 
     def startTagInput(self, name, attributes):
@@ -783,7 +810,8 @@ class InBodyPhase(Phase):
         "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
         "tr", "noscript"
         """
-        self.parser.parseError()
+        self.parser.parseError(_(u"Unexpected start tag (" + name +\
+          u"). Ignored."))
 
     def startTagNew(self, name, other):
         """New HTML5 elements, "event-source", "section", "nav",
@@ -798,7 +826,7 @@ class InBodyPhase(Phase):
     def endTagP(self, name):
         self.tree.generateImpliedEndTags("p")
         if self.tree.openElements[-1].name != "p":
-            self.parser.parseError()
+            self.parser.parseError("Unexpected end tag (p).")
         while self.tree.elementInScope("p"):
             self.tree.openElements.pop()
 
@@ -811,7 +839,8 @@ class InBodyPhase(Phase):
             self.parser.parseError()
             return
         if self.tree.openElements[-1].name != "body":
-            self.parser.parseError()
+            self.parser.parseError(_("Unexpected end tag (body). Missing "
+              u"end tag (" + self.tree.openElements[-1].name + ")."))
         self.parser.phase = self.parser.phases["afterBody"]
 
     def endTagHtml(self, name):
@@ -824,7 +853,8 @@ class InBodyPhase(Phase):
         if inScope:
             self.tree.generateImpliedEndTags()
         if self.tree.openElements[-1].name != name:
-             self.parser.parseError()
+             self.parser.parseError((u"End tag (" + name + ") seen too "
+               u"early. Expected other end tag."))
         if inScope:
             node = self.tree.openElements.pop()
             while node.name != name:
@@ -839,7 +869,8 @@ class InBodyPhase(Phase):
         if self.tree.elementInScope(name):
             self.tree.generateImpliedEndTags(name)
             if self.tree.openElements[-1].name != name:
-                self.parser.parseError()
+                self.parser.parseError((u"End tag (" + name + ") seen too "
+                  u"early. Expected other end tag."))
 
         if self.tree.elementInScope(name):
             node = self.tree.openElements.pop()
@@ -852,7 +883,8 @@ class InBodyPhase(Phase):
                 self.tree.generateImpliedEndTags()
                 break
         if self.tree.openElements[-1].name != name:
-            self.parser.parseError()
+            self.parser.parseError((u"Unexpected end tag (" + name + "). "
+                  u"Expected other end tag."))
 
         for item in headingElements:
             if self.tree.elementInScope(item):
@@ -864,23 +896,28 @@ class InBodyPhase(Phase):
     def endTagFormatting(self, name):
         """The much-feared adoption agency algorithm
         """
+        # http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
+        # XXX Better parseError messages appreciated.
         while True:
             # Step 1 paragraph 1
             afeElement = self.tree.elementInActiveFormattingElements(name)
             if not afeElement or (afeElement in self.tree.openElements and
               not self.tree.elementInScope(afeElement.name)):
-                self.parser.parseError()
+                self.parser.parseError(_(u"End tag (" + name + ") violates "
+                  u" step 1, paragraph 1 of the adoption agency algorithm."))
                 return
 
             # Step 1 paragraph 2
             elif afeElement not in self.tree.openElements:
-                self.parser.parseError()
+                self.parser.parseError(_(u"End tag (" + name + ") violates "
+                  u" step 1, paragraph 2 of the adoption agency algorithm."))
                 self.tree.activeFormattingElements.remove(afeElement)
                 return
 
             # Step 1 paragraph 3
             if afeElement != self.tree.openElements[-1]:
-                self.parser.parseError()
+                self.parser.parseError(_(u"End tag (" + name + ") violates "
+                  u" step 1, paragraph 3 of the adoption agency algorithm."))
 
             # Step 2
             # Start of the adoption agency algorithm proper
@@ -979,7 +1016,8 @@ class InBodyPhase(Phase):
         if self.tree.elementInScope(name):
             self.tree.generateImpliedEndTags()
         if self.tree.openElements[-1].name != name:
-            self.parser.parseError()
+            self.parser.parseError(_(u"Unexpected end tag (" + name +\
+              "). Expected other end tag first."))
 
         if self.tree.elementInScope(name):
             element = self.tree.openElements.pop()
@@ -987,24 +1025,21 @@ class InBodyPhase(Phase):
                 element = self.tree.openElements.pop()
             self.tree.clearActiveFormattingElements()
 
-    def endTagMisplacedNone(self, name):
-        """ Elements that should be children of other elements that have a
-        different insertion mode or elements that have no end tag;
-        here they are ignored
-        "caption", "col", "colgroup", "frame", "frameset", "head",
-        "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
-        "tr", "noscript, "area", "basefont", "bgsound", "br", "embed",
-        "hr", "iframe", "image", "img", "input", "isindex", "noembed",
-        "noframes", "param", "select", "spacer", "table", "textarea", "wbr""
-        """
-        self.parser.parseError()
+    def endTagMisplaced(self, name):
+        # This handles elements with end tags in other insertion modes.
+        self.parser.parseError(_(u"Unexpected end tag (" + name +\
+          u"). Ignored."))
+
+    def endTagNone(self, name):
+        # This handles elements with no end tag.
+        self.parser.parseError(_(u"This tag (" + name + u") has no end tag"))
 
     def endTagCdataTextAreaXmp(self, name):
         if self.tree.openElements[-1].name == name:
             self.tree.openElements.pop()
         else:
-            self.parser.parseError(_("Unexpected end tag " + name +\
-              ". Ignored."))
+            self.parser.parseError(_("Unexpected end tag (" + name +\
+              "). Ignored."))
 
     def endTagNew(self, name):
         """New HTML5 elements, "event-source", "section", "nav",
@@ -1019,14 +1054,15 @@ class InBodyPhase(Phase):
             if node.name == name:
                 self.tree.generateImpliedEndTags()
                 if self.tree.openElements[-1].name != name:
-                    self.parser.parseError(_("Unexpected end tag " + name +\
-                      "."))
+                    self.parser.parseError(_("Unexpected end tag (" + name +\
+                      ")."))
                 while self.tree.openElements.pop() != node:
                     pass
                 break
             else:
                 if node.name in specialElements | scopingElements:
-                    self.parser.parseError()
+                    self.parser.parseError(_(u"Unexpected end tag (" + name +\
+                      "). Ignored."))
                     break
 
 class InTablePhase(Phase):
@@ -1055,13 +1091,15 @@ class InTablePhase(Phase):
     def clearStackToTableContext(self):
         # "clear the stack back to a table context"
         while self.tree.openElements[-1].name not in ("table", "html"):
+            self.parser.parseError(_(u"Unexpected implied end tag (" +\
+              self.tree.openElements[-1].name + u") in the table phase."))
             self.tree.openElements.pop()
-            self.parser.parseError()
         # When the current node is <html> it's an innerHTML case
 
     # processing methods
     def processCharacters(self, data):
-        self.parser.parseError()
+        self.parser.parseError(_(u"Unexpected non-space characters in "
+          u"table context caused voodoo mode."))
         # Make all the special element rearranging voodoo kick in
         self.tree.insertFromTable = True
         # Process the character in the "in body" mode
@@ -1099,7 +1137,8 @@ class InTablePhase(Phase):
             self.parser.phase.processStartTag(name, attributes)
 
     def startTagOther(self, name, attributes):
-        self.parser.parseError()
+        self.parser.parseError(_(u"Unexpected start tag (" + name + u") in "
+          u"table context caused voodoo mode."))
         # Make all the special element rearranging voodoo kick in
         self.tree.insertFromTable = True
         # Process the start tag in the "in body" mode
@@ -1109,7 +1148,7 @@ class InTablePhase(Phase):
     def endTagTable(self, name):
         if self.tree.elementInScope("table", True):
             self.tree.generateImpliedEndTags()
-            if self.tree.openElements[-1].name == "table":
+            if self.tree.openElements[-1].name != "table":
                 self.parser.parseError()
             while self.tree.openElements[-1].name != "table":
                 self.tree.openElements.pop()
@@ -1120,9 +1159,12 @@ class InTablePhase(Phase):
             # innerHTML case
 
     def endTagIgnore(self, name):
-        self.parser.parseError()
+        self.parser.parseError(_("Unexpected end tag (" + name +\
+          "). Ignored."))
 
     def endTagOther(self, name):
+        self.parser.parseError(_(u"Unexpected end tag (" + name + u") in "
+          u"table context caused voodoo mode."))
         # Make all the special element rearranging voodoo kick in
         self.parser.insertFromTable = True
         # Process the end tag in the "in body" mode
@@ -1169,10 +1211,12 @@ class InCaptionPhase(Phase):
         if self.tree.elementInScope(name, True):
             # AT this code is quite similar to endTagTable in "InTable"
             self.tree.generateImpliedEndTags()
-            if self.tree.openElements[-1].name == "caption":
-                self.parser.parseError()
+            if self.tree.openElements[-1].name != "caption":
+                self.parser.parseError(_(u"Unexpected end tag (caption). "
+                  u"Missing end tags."))
             while self.tree.openElements[-1].name != "caption":
                 self.tree.openElements.pop()
+            self.tree.openElements.pop()
             self.tree.clearActiveFormattingElements()
             self.parser.phase = self.parser.phases["inTable"]
         else:
@@ -1187,7 +1231,8 @@ class InCaptionPhase(Phase):
             self.parser.phase.processStartTag(name, attributes)
 
     def endTagIgnore(self, name):
-        self.parser.parseError()
+        self.parser.parseError(_("Unexpected end tag (" + name +\
+          "). Ignored."))
 
     def endTagOther(self, name):
         self.parser.phases["inBody"].processEndTag(name)
@@ -1236,7 +1281,8 @@ class InColumnGroupPhase(Phase):
             self.parser.phase = self.parser.phases["inTable"]
 
     def endTagCol(self, name):
-        self.parser.parseError()
+        self.parser.parseError(_(u"Unexpected end tag (col). "
+          u"col has no end tag."))
 
     def endTagOther(self, name):
         self.endTagColgroup("colgroup")
@@ -1269,8 +1315,9 @@ class InTableBodyPhase(Phase):
     def clearStackToTableBodyContext(self):
         while self.tree.openElements[-1].name not in ("tbody", "tfoot",
           "thead", "html"):
+            self.parser.parseError(_(u"Unexpected implied end tag (" +\
+              self.tree.openElements[-1].name + u") in the table body phase."))
             self.tree.openElements.pop()
-            self.parser.parseError()
 
     # the rest
     def processCharacters(self,data):
@@ -1282,7 +1329,8 @@ class InTableBodyPhase(Phase):
         self.parser.phase = self.parser.phases["inRow"]
 
     def startTagTableCell(self, name, attributes):
-        self.parser.parseError()
+        self.parser.parseError(_(u"Unexpected table cell start tag (" +\
+          name + u") in the table body phase."))
         self.startTagTr("tr", {})
         self.parser.phase.processStartTag(name, attributes)
 
@@ -1307,7 +1355,8 @@ class InTableBodyPhase(Phase):
             self.tree.openElements.pop()
             self.parser.phase = self.parser.phases["inTable"]
         else:
-            self.parser.parseError()
+            self.parser.parseError(_("Unexpected end tag (" + name +\
+              ") in the table body phase. Ignored."))
 
     def endTagTable(self, name):
         if self.tree.elementInScope("tbody", True) or \
@@ -1321,7 +1370,8 @@ class InTableBodyPhase(Phase):
             self.parser.parseError()
 
     def endTagIgnore(self, name):
-        self.parser.parseError()
+        self.parser.parseError(_("Unexpected end tag (" + name +\
+          ") in the table body phase. Ignored."))
 
     def endTagOther(self, name):
         self.parser.phases["inTable"].processEndTag(name)
@@ -1351,8 +1401,9 @@ class InRowPhase(Phase):
     # helper methods (XXX unify this with other table helper methods)
     def clearStackToTableRowContext(self):
         while self.tree.openElements[-1].name not in ("tr", "html"):
+            self.parser.parseError(_(u"Unexpected implied end tag (" +\
+              self.tree.openElements[-1].name + u") in the row phase."))
             self.tree.openElements.pop()
-            self.parser.parseError()
 
     # the rest
     def processCharacters(self, data):
@@ -1398,7 +1449,8 @@ class InRowPhase(Phase):
             self.parser.parseError()
 
     def endTagIgnore(self, name):
-        self.parser.parseError()
+        self.parser.parseError(_("Unexpected end tag (" + name +\
+          u") in the row phase. Ignored."))
 
     def endTagOther(self, name):
         self.parser.phases["inTable"].processEndTag(name)
@@ -1452,7 +1504,8 @@ class InCellPhase(Phase):
         if self.tree.elementInScope(name, True):
             self.tree.generateImpliedEndTags(name)
             if self.tree.openElements[-1].name != name:
-                self.parser.parseError()
+                self.parser.parseError("Got table cell end tag (" + name +\
+                  ") while required end tags are missing.")
                 while True:
                     node = self.tree.openElements.pop()
                     if node.name == name:
@@ -1462,10 +1515,12 @@ class InCellPhase(Phase):
             self.tree.clearActiveFormattingElements()
             self.parser.phase = self.parser.phases["inRow"]
         else:
-            self.parser.parseError()
+            self.parser.parseError(_("Unexpected end tag (" + name +\
+              "). Ignored."))
 
     def endTagIgnore(self, name):
-        self.parser.parseError()
+        self.parser.parseError(_("Unexpected end tag (" + name +\
+          "). Ignored."))
 
     def endTagImply(self, name):
         if self.tree.elementInScope(name, True):
@@ -1492,7 +1547,7 @@ class InSelectPhase(Phase):
             ("optgroup", self.startTagOptgroup),
             ("select", self.startTagSelect)
         ])
-        self.startTagHandler.default = self.processAnythingElse
+        self.startTagHandler.default = self.startTagOther
 
         self.endTagHandler = utils.MethodDispatcher([
             ("option", self.endTagOption),
@@ -1501,7 +1556,7 @@ class InSelectPhase(Phase):
             (("caption", "table", "tbody", "tfoot", "thead", "tr", "td",
               "th"), self.endTagTableElements)
         ])
-        self.endTagHandler.default = self.processAnythingElse
+        self.endTagHandler.default = self.endTagOther
 
     # http://www.whatwg.org/specs/web-apps/current-work/#in-select
     def processCharacters(self, data):
@@ -1521,14 +1576,20 @@ class InSelectPhase(Phase):
         self.tree.insertElement(name, attributes)
 
     def startTagSelect(self, name, attributes):
-        self.parser.parseError()
+        self.parser.parseError(_(u"Unexpected start tag (select) in the "
+          u"select phase implies select start tag."))
         self.endTagSelect("select")
 
+    def startTagOther(self, name, attributes):
+        self.parser.parseError(_(u"Unexpected start tag token (" + name +\
+          u") in the select phase. Ignored."))
+
     def endTagOption(self, name):
         if self.tree.openElements[-1].name == "option":
             self.tree.openElements.pop()
         else:
-            self.parser.parseError()
+            self.parser.parseError(_(u"Unexpected end tag (option) in the "
+              u"select phase. Ignored."))
 
     def endTagOptgroup(self, name):
         # </optgroup> implicitly closes <option>
@@ -1540,7 +1601,8 @@ class InSelectPhase(Phase):
             self.tree.openElements.pop()
         # But nothing else
         else:
-            self.parser.parseError()
+            self.parser.parseError(_(u"Unexpected end tag (optgroup) in the "
+              u"select phase. Ignored."))
 
     def endTagSelect(self, name):
         if self.tree.elementInScope(name, True):
@@ -1553,13 +1615,15 @@ class InSelectPhase(Phase):
             self.parser.parseError()
 
     def endTagTableElements(self, name):
-        self.parser.parseError()
+        self.parser.parseError(_(u"Unexpected table end tag (" + name +\
+          ") in the select phase."))
         if self.tree.elementInScope(name, True):
             self.endTagSelect()
             self.parser.phase.processEndTag(name)
 
-    def processAnythingElse(self, name, attributes={}):
-        self.parser.parseError()
+    def endTagOther(self, name):
+        self.parser.parseError(_(u"Unexpected end tag token (" + name +\
+          u") in the select phase. Ignored."))
 
 
 class AfterBodyPhase(Phase):
@@ -1576,12 +1640,14 @@ class AfterBodyPhase(Phase):
         self.tree.insertComment(data, self.tree.openElements[0])
 
     def processCharacters(self, data):
-        self.parser.parseError()
+        self.parser.parseError(_(u"Unexpected non-space characters in the "
+          u"after body phase."))
         self.parser.phase = self.parser.phases["inBody"]
         self.parser.phase.processCharacters(data)
 
     def processStartTag(self, name, attributes):
-        self.parser.parseError()
+        self.parser.parseError(_(u"Unexpected start tag token (" + name +\
+          u") in the after body phase."))
         self.parser.phase = self.parser.phases["inBody"]
         self.parser.phase.processStartTag(name, attributes)
 
@@ -1589,11 +1655,17 @@ class AfterBodyPhase(Phase):
         if self.parser.innerHTML:
             self.parser.parseError()
         else:
+            # XXX: This may need to be done, not sure:
+            # Don't set lastPhase to the current phase but to the inBody phase
+            # instead. No need for extra parse errors if there's something
+            # after </html>.
+            # Try "<!doctype html>X</html>X" for instance.
             self.parser.lastPhase = self.parser.phase
             self.parser.phase = self.parser.phases["trailingEnd"]
 
     def endTagOther(self, name):
-        self.parser.parseError()
+        self.parser.parseError(_(u"Unexpected end tag token (" + name +\
+          u") in the after body phase."))
         self.parser.phase = self.parser.phases["inBody"]
         self.parser.phase.processEndTag(name)
 
@@ -1617,8 +1689,8 @@ class InFramesetPhase(Phase):
         self.endTagHandler.default = self.endTagOther
 
     def processCharacters(self, data):
-        self.parser.parseError(_("Unepxected characters in the frameset phase. "
-          "Characters ignored."))
+        self.parser.parseError(_(u"Unepxected characters in "
+          u"the frameset phase. Characters ignored."))
 
     def startTagFrameset(self, name, attributes):
         self.tree.insertElement(name, attributes)
@@ -1631,14 +1703,14 @@ class InFramesetPhase(Phase):
         self.parser.phases["inBody"].processStartTag(name, attributes)
 
     def startTagOther(self, name, attributes):
-        self.parser.parseError(_("Unexpected start tag token (" + name +\
-          ") in the frameset phase."))
+        self.parser.parseError(_(u"Unexpected start tag token (" + name +\
+          u") in the frameset phase. Ignored"))
 
     def endTagFrameset(self, name):
         if self.tree.openElements[-1].name == "html":
             # innerHTML case
-            self.parser.parseError(_("Unexpected end tag token (frameset) in the"
-              "frameset phase (innerHTML)"))
+            self.parser.parseError(_(u"Unexpected end tag token (frameset)"
+              u"in the frameset phase (innerHTML)."))
         else:
             self.tree.openElements.pop()
         if not self.parser.innerHTML and\
@@ -1651,8 +1723,8 @@ class InFramesetPhase(Phase):
         self.parser.phases["inBody"].processEndTag(name)
 
     def endTagOther(self, name):
-        self.parser.parseError(_("Unexpected end tag token (" + name +
-          ") in the frameset phase."))
+        self.parser.parseError(_(u"Unexpected end tag token (" + name +
+          u") in the frameset phase. Ignored."))
 
 
 class AfterFramesetPhase(Phase):
@@ -1672,20 +1744,23 @@ class AfterFramesetPhase(Phase):
         self.endTagHandler.default = self.endTagOther
 
     def processCharacters(self, data):
-        self.parser.parseError()
+        self.parser.parseError(_(u"Unexpected non-space characters in the "
+          u"after frameset phase. Ignored."))
 
     def startTagNoframes(self, name, attributes):
         self.parser.phases["inBody"].processStartTag(name, attributes)
 
     def startTagOther(self, name, attributes):
-        self.parser.parseError()
+        self.parser.parseError(_(u"Unexpected start tag (" + name +\
+          u") in the after frameset phase. Ignored."))
 
     def endTagHtml(self, name):
         self.parser.lastPhase = self.parser.phase
         self.parser.phase = self.parser.phases["trailingEnd"]
 
     def endTagOther(self, name):
-        self.parser.parseError()
+        self.parser.parseError(_(u"Unexpected end tag (" + name +\
+          u") in the after frameset phase. Ignored."))
 
 
 class TrailingEndPhase(Phase):
@@ -1696,20 +1771,23 @@ class TrailingEndPhase(Phase):
         self.parser.insertCommenr(data, self.tree.document)
 
     def processSpaceCharacters(self, data):
-        self.parser.lastPhase.processCharacters(data)
+        self.parser.lastPhase.processSpaceCharacters(data)
 
     def processCharacters(self, data):
-        self.parser.parseError()
+        self.parser.parseError(_(u"Unexpected non-space characters. "
+          u"Expected end of file."))
         self.parser.phase = self.parser.lastPhase
         self.parser.phase.processCharacters(data)
 
     def processStartTag(self, name, attributes):
-        self.parser.parseError()
+        self.parser.parseError(_(u"Unexpected start tag (" + name +\
+          u"). Expected end of file."))
         self.parser.phase = self.parser.lastPhase
         self.parser.phase.processStartTag(name, attributes)
 
     def processEndTag(self, name):
-        self.parser.parseError()
+        self.parser.parseError(_(u"Unexpected end tag (" + name +\
+          u"). Expected end of file."))
         self.parser.phase = self.parser.lastPhase
         self.parser.phase.processEndTag(name)
 
diff --git a/planet/html5lib/liberalxmlparser.py b/planet/html5lib/liberalxmlparser.py
index 93bbee9..a3d98cf 100644
--- a/planet/html5lib/liberalxmlparser.py
+++ b/planet/html5lib/liberalxmlparser.py
@@ -11,30 +11,25 @@ References:
  * http://wiki.whatwg.org/wiki/HtmlVsXhtml
 
 @@TODO:
- * Build a Treebuilder that produces Python DOM objects:
-     http://docs.python.org/lib/module-xml.dom.html
  * Produce SAX events based on the produced DOM.  This is intended not to
    support streaming, but rather to support application level compatibility. 
  * Optional namespace support
- * Special case the output of XHTML <script> elements so that the empty
-   element syntax is never used, even when the src attribute is provided.
-   Also investigate the use of <![CDATA[]>> when tokenizer.contentModelFlag
+ * Investigate the use of <![CDATA[]]> when tokenizer.contentModelFlag
    indicates CDATA processsing to ensure dual HTML/XHTML compatibility.
- * Map illegal XML characters to U+FFFD, possibly with additional markup in
-   the case of XHTML
  * Selectively lowercase only XHTML, but not foreign markup
 """
 
 import html5parser
+from constants import voidElements
 import gettext
 _ = gettext.gettext
 
-class XHTMLParser(html5parser.HTMLParser):
-    """ liberal XMTHML parser """
+class XMLParser(html5parser.HTMLParser):
+    """ liberal XML parser """
 
     def __init__(self, *args, **kwargs):
         html5parser.HTMLParser.__init__(self, *args, **kwargs)
-        self.phases["rootElement"] = XhmlRootPhase(self, self.tree)
+        self.phases["initial"] = XmlRootPhase(self, self.tree)
 
     def normalizeToken(self, token):
         if token["type"] == "StartTag" or token["type"] == "EmptyTag":
@@ -51,6 +46,35 @@ class XHTMLParser(html5parser.HTMLParser):
                 token["data"] = {}
                 token["type"] = "EndTag"
 
+        elif token["type"] == "EndTag":
+            if token["data"]:
+               self.parseError(_("End tag contains unexpected attributes."))
+
+        return token
+
+class XHTMLParser(XMLParser):
+    """ liberal XMTHML parser """
+
+    def __init__(self, *args, **kwargs):
+        html5parser.HTMLParser.__init__(self, *args, **kwargs)
+        self.phases["rootElement"] = XhmlRootPhase(self, self.tree)
+
+    def normalizeToken(self, token):
+        token = XMLParser.normalizeToken(self, token)
+
+        # ensure that non-void XHTML elements have content so that separate
+        # open and close tags are emitted
+        if token["type"]  == "EndTag" and \
+            token["name"] not in voidElements and \
+            token["name"] == self.tree.openElements[-1].name and \
+            not self.tree.openElements[-1].hasContent():
+            for e in self.tree.openElements:
+                if 'xmlns' in e.attributes.keys():
+                    if e.attributes['xmlns'] <> 'http://www.w3.org/1999/xhtml':
+                        break
+            else:
+                self.tree.insertText('')
+
         return token
 
 class XhmlRootPhase(html5parser.RootElementPhase):
@@ -60,13 +84,6 @@ class XhmlRootPhase(html5parser.RootElementPhase):
         self.tree.document.appendChild(element)
         self.parser.phase = self.parser.phases["beforeHead"]
 
-class XMLParser(XHTMLParser):
-    """ liberal XML parser """
-
-    def __init__(self, *args, **kwargs):
-        XHTMLParser.__init__(self, *args, **kwargs)
-        self.phases["initial"] = XmlRootPhase(self, self.tree)
-
 class XmlRootPhase(html5parser.Phase):
     """ Prime the Xml parser """
     def __getattr__(self, name):
diff --git a/planet/html5lib/tokenizer.py b/planet/html5lib/tokenizer.py
index 85e0a0d..3f4db08 100644
--- a/planet/html5lib/tokenizer.py
+++ b/planet/html5lib/tokenizer.py
@@ -110,6 +110,9 @@ class HTMLTokenizer(object):
         If not present self.tokenQueue.append({"type": "ParseError"}) is invoked.
         """
 
+        # XXX More need to be done here. For instance, #13 should prolly be
+        # converted to #10 so we don't get \r (#13 is \r right?) in the DOM and
+        # such. Thoughts on this appreciated.
         allowed = digits
         radix = 10
         if isHex:
@@ -227,7 +230,7 @@ class HTMLTokenizer(object):
                 # discarded or needs to be put back.
                 if not charStack[-1] == ";":
                     self.tokenQueue.append({"type": "ParseError", "data":
-                      _("Named entity did not  ';'.")})
+                      _("Named entity didn't end with ';'.")})
                     self.stream.queue.extend(charStack[entityLength:])
             else:
                 self.tokenQueue.append({"type": "ParseError", "data":
@@ -245,50 +248,15 @@ class HTMLTokenizer(object):
             self.currentToken["data"][-1][1] += u"&"
 
     def emitCurrentToken(self):
-        """This method is a generic handler for emitting the StartTag,
-        EndTag, Comment and Doctype. It also sets the state to
-        "data" because that's what's needed after a token has been emitted.
+        """This method is a generic handler for emitting the tags. It also sets
+        the state to "data" because that's what's needed after a token has been
+        emitted.
         """
 
-        # Although isinstance() is http://www.canonical.org/~kragen/isinstance/
-        # considered harmful it should be ok here given that the classes are for
-        # internal usage.
-
-        token = self.currentToken
-
-        # If an end tag has attributes it's a parse error and they should
-        # be removed
-        if token["type"] == "EndTag" and token["data"]:
-            self.tokenQueue.append({"type": "ParseError", "data":
-              _("End tag contains unexpected attributes.")})
-            token["data"] = {}
-
         # Add token to the queue to be yielded
-        self.tokenQueue.append(token)
+        self.tokenQueue.append(self.currentToken)
         self.state = self.states["data"]
 
-    def emitCurrentTokenWithParseError(self, data=None):
-        # XXX if we want useful error messages we need to inline this method
-        """This method is equivalent to emitCurrentToken (well, it invokes it)
-        except that it also puts "data" back on the characters queue if a data
-        argument is provided and it throws a parse error."""
-        if data:
-            self.stream.queue.append(data)
-        self.tokenQueue.append({"type": "ParseError", "data":
-          _("XXX Something is wrong with the emitted token.")})
-        self.emitCurrentToken()
-
-    def attributeValueQuotedStateHandler(self, quoteType):
-        data = self.stream.char()
-        if data == quoteType:
-            self.state = self.states["beforeAttributeName"]
-        elif data == u"&":
-            self.processEntityInAttribute()
-        elif data == EOF:
-            self.emitCurrentTokenWithParseError(data)
-        else:
-            self.currentToken["data"][-1][1] += data + self.stream.charsUntil(\
-              (quoteType, u"&"))
 
     # Below are the various tokenizer states worked out.
 
@@ -351,14 +319,14 @@ class HTMLTokenizer(object):
                 # XXX In theory it could be something besides a tag name. But
                 # do we really care?
                 self.tokenQueue.append({"type": "ParseError", "data":
-                  _("Expected tag name. Got '?' instead (HTML doesn't support processing instructions).")})
+                  _("Expected tag name. Got '?' instead (HTML doesn't "
+                  "support processing instructions).")})
                 self.stream.queue.append(data)
                 self.state = self.states["bogusComment"]
             else:
                 # XXX
                 self.tokenQueue.append({"type": "ParseError", "data":
                   _("Expected tag name. Got something else instead")})
-                # XXX can't we do "<" + data here?
                 self.tokenQueue.append({"type": "Characters", "data": u"<"})
                 self.stream.queue.append(data)
                 self.state = self.states["data"]
@@ -427,7 +395,7 @@ class HTMLTokenizer(object):
                 self.tokenQueue.append({"type": "Characters", "data": u"</"})
                 self.state = self.states["data"]
             else:
-                # XXX data can be '...
+                # XXX data can be _'_...
                 self.tokenQueue.append({"type": "ParseError", "data":
                   _("Expected closing tag. Unexpected character '" + data + "' found.")})
                 self.stream.queue.append(data)
@@ -443,8 +411,15 @@ class HTMLTokenizer(object):
               self.stream.charsUntil(asciiLetters, True)
         elif data == u">":
             self.emitCurrentToken()
-        elif data == u"<" or data == EOF:
-            self.emitCurrentTokenWithParseError(data)
+        elif data == u"<":
+            self.stream.queue.append(data)
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected < character when getting the tag name.")})
+            self.emitCurrentToken()
+        elif data == EOF:
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected end of file in the tag name.")})
+            self.emitCurrentToken()
         elif data == u"/":
             self.processSolidusInTag()
             self.state = self.states["beforeAttributeName"]
@@ -463,8 +438,15 @@ class HTMLTokenizer(object):
             self.emitCurrentToken()
         elif data == u"/":
             self.processSolidusInTag()
-        elif data == u"<" or data == EOF:
-            self.emitCurrentTokenWithParseError(data)
+        elif data == u"<":
+            self.stream.queue.append(data)
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected < character. Expected attribute name instead.")})
+            self.emitCurrentToken()
+        elif data == EOF:
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected end of file. Expected attribute name instead.")})
+            self.emitCurrentToken()
         else:
             self.currentToken["data"].append([data, ""])
             self.state = self.states["attributeName"]
@@ -489,8 +471,16 @@ class HTMLTokenizer(object):
         elif data == u"/":
             self.processSolidusInTag()
             self.state = self.states["beforeAttributeName"]
-        elif data == u"<" or data == EOF:
-            self.emitCurrentTokenWithParseError(data)
+        elif data == u"<":
+            self.stream.queue.append(data)
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected < character in attribute name.")})
+            self.emitCurrentToken()
+            leavingThisState = False
+        elif data == EOF:
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected end of file in attribute name.")})
+            self.emitCurrentToken()
             leavingThisState = False
         else:
             self.currentToken["data"][-1][0] += data
@@ -523,8 +513,15 @@ class HTMLTokenizer(object):
         elif data == u"/":
             self.processSolidusInTag()
             self.state = self.states["beforeAttributeName"]
-        elif data == u"<" or data == EOF:
-            self.emitCurrentTokenWithParseError(data)
+        elif data == u"<":
+            self.stream.queue.append(data)
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected < character. Expected = or end of tag.")})
+            self.emitCurrentToken()
+        elif data == EOF:
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected end of file. Expected = or end of tag.")})
+            self.emitCurrentToken()
         else:
             self.currentToken["data"].append([data, ""])
             self.state = self.states["attributeName"]
@@ -543,22 +540,48 @@ class HTMLTokenizer(object):
             self.state = self.states["attributeValueSingleQuoted"]
         elif data == u">":
             self.emitCurrentToken()
-        elif data == u"<" or data == EOF:
-            self.emitCurrentTokenWithParseError(data)
+        elif data == u"<":
+            self.stream.queue.append(data)
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected < character. Expected attribute value.")})
+            self.emitCurrentToken()
+        elif data == EOF:
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected end of file. Expected attribute value.")})
+            self.emitCurrentToken()
         else:
             self.currentToken["data"][-1][1] += data
             self.state = self.states["attributeValueUnQuoted"]
         return True
 
     def attributeValueDoubleQuotedState(self):
-        # AT We could also let self.attributeValueQuotedStateHandler always
-        # return true and then return that directly here. Not sure what is
-        # faster or better...
-        self.attributeValueQuotedStateHandler(u"\"")
+        data = self.stream.char()
+        if data == "\"":
+            self.state = self.states["beforeAttributeName"]
+        elif data == u"&":
+            self.processEntityInAttribute()
+        elif data == EOF:
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected end of file in attribute value (\").")})
+            self.emitCurrentToken()
+        else:
+            self.currentToken["data"][-1][1] += data +\
+              self.stream.charsUntil(("\"", u"&"))
         return True
 
     def attributeValueSingleQuotedState(self):
-        self.attributeValueQuotedStateHandler(u"'")
+        data = self.stream.char()
+        if data == "'":
+            self.state = self.states["beforeAttributeName"]
+        elif data == u"&":
+            self.processEntityInAttribute()
+        elif data == EOF:
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected end of file in attribute value (').")})
+            self.emitCurrentToken()
+        else:
+            self.currentToken["data"][-1][1] += data +\
+              self.stream.charsUntil(("'", u"&"))
         return True
 
     def attributeValueUnQuotedState(self):
@@ -569,8 +592,15 @@ class HTMLTokenizer(object):
             self.processEntityInAttribute()
         elif data == u">":
             self.emitCurrentToken()
-        elif data == u"<" or data == EOF:
-            self.emitCurrentTokenWithParseError(data)
+        elif data == u"<":
+            self.stream.queue.append(data)
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected < character in attribute value.")})
+            self.emitCurrentToken()
+        elif data == EOF:
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected end of file in attribute value.")})
+            self.emitCurrentToken()
         else:
             self.currentToken["data"][-1][1] += data + self.stream.charsUntil( \
               frozenset(("&", ">","<")) | spaceCharacters)
@@ -615,8 +645,10 @@ class HTMLTokenizer(object):
         if data == u"-":
             self.state = self.states["commentDash"]
         elif data == EOF:
-            # XXX EMIT
-            self.emitCurrentTokenWithParseError()
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected end of file in comment.")})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.states["data"]
         else:
             self.currentToken["data"] += data + self.stream.charsUntil(u"-")
         return True
@@ -626,8 +658,10 @@ class HTMLTokenizer(object):
         if data == u"-":
             self.state = self.states["commentEnd"]
         elif data == EOF:
-            # XXX EMIT
-            self.emitCurrentTokenWithParseError()
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected end of file in comment (-)")})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.states["data"]
         else:
             self.currentToken["data"] += u"-" + data +\
               self.stream.charsUntil(u"-")
@@ -640,15 +674,17 @@ class HTMLTokenizer(object):
     def commentEndState(self):
         data = self.stream.char()
         if data == u">":
-            # XXX EMIT
-            self.emitCurrentToken()
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.states["data"]
         elif data == u"-":
             self.tokenQueue.append({"type": "ParseError", "data":
               _("Unexpected '-' after '--' found in comment.")})
             self.currentToken["data"] += data
         elif data == EOF:
-            # XXX EMIT
-            self.emitCurrentTokenWithParseError()
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected end of file in comment (--).")})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.states["data"]
         else:
             # XXX
             self.tokenQueue.append({"type": "ParseError", "data":
@@ -678,11 +714,15 @@ class HTMLTokenizer(object):
         elif data == u">":
             # Character needs to be consumed per the specification so don't
             # invoke emitCurrentTokenWithParseError with "data" as argument.
-            # XXX EMIT
-            self.emitCurrentTokenWithParseError()
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected > character. Expected DOCTYPE name.")})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.states["data"]
         elif data == EOF:
-            # XXX EMIT
-            self.emitCurrentTokenWithParseError()
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected end of file. Expected DOCTYPE name.")})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.states["data"]
         else:
             self.currentToken["name"] = data
             self.state = self.states["doctypeName"]
@@ -698,8 +738,10 @@ class HTMLTokenizer(object):
             self.tokenQueue.append(self.currentToken)
             self.state = self.states["data"]
         elif data == EOF:
-            # XXX EMIT
-            self.emitCurrentTokenWithParseError()
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected end of file in DOCTYPE name.")})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.states["data"]
         else:
             # We can't just uppercase everything that arrives here. For
             # instance, non-ASCII characters.
@@ -724,7 +766,11 @@ class HTMLTokenizer(object):
         elif data == EOF:
             self.currentToken["data"] = True
             # XXX EMIT
-            self.emitCurrentTokenWithParseError(data)
+            self.stream.queue.append(data)
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected end of file in DOCTYPE.")})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.states["data"]
         else:
             self.tokenQueue.append({"type": "ParseError", "data":
               _("Expected space or '>'. Got '" + data + "'")})
@@ -739,7 +785,11 @@ class HTMLTokenizer(object):
             self.state = self.states["data"]
         elif data == EOF:
             # XXX EMIT
-            self.emitCurrentTokenWithParseError(data)
+            self.stream.queue.append(data)
+            self.tokenQueue.append({"type": "ParseError", "data":
+              _("Unexpected end of file in bogus doctype.")})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.states["data"]
         else:
             pass
         return True
diff --git a/planet/html5lib/treebuilders/__init__.py b/planet/html5lib/treebuilders/__init__.py
index 6171bd1..9470145 100755
--- a/planet/html5lib/treebuilders/__init__.py
+++ b/planet/html5lib/treebuilders/__init__.py
@@ -33,4 +33,10 @@ the various methods.
 import os.path
 __path__.append(os.path.dirname(__path__[0]))
 
-import dom, etree, simpletree
+import dom
+import simpletree
+
+try:
+    import etree
+except:
+    pass
diff --git a/planet/html5lib/treebuilders/_base.py b/planet/html5lib/treebuilders/_base.py
index 8cc2af1..c4af003 100755
--- a/planet/html5lib/treebuilders/_base.py
+++ b/planet/html5lib/treebuilders/_base.py
@@ -1,4 +1,10 @@
 from constants import scopingElements, tableInsertModeElements
+try:
+    frozenset
+except NameError:
+    # Import from the sets module for python 2.3
+    from sets import Set as set
+    from sets import ImmutableSet as frozenset
 
 # The scope markers are inserted when entering buttons, object elements,
 # marquees, table cells, and table captions, and are used to prevent formatting
diff --git a/planet/html5lib/treebuilders/dom.py b/planet/html5lib/treebuilders/dom.py
index 7d8b319..39a88f6 100755
--- a/planet/html5lib/treebuilders/dom.py
+++ b/planet/html5lib/treebuilders/dom.py
@@ -14,6 +14,10 @@ class AttrList:
         self.element.setAttribute(name, value)
     def items(self):
         return self.element.attributes.items()
+    def keys(self):
+        return self.element.attributes.keys()
+    def __getitem__(self, name):
+        return self.element.getAttribute(name)
 
 class NodeBuilder(_base.Node):
     def __init__(self, element):
diff --git a/planet/html5lib/treebuilders/simpletree.py b/planet/html5lib/treebuilders/simpletree.py
index d93b656..6b2f09e 100755
--- a/planet/html5lib/treebuilders/simpletree.py
+++ b/planet/html5lib/treebuilders/simpletree.py
@@ -1,4 +1,5 @@
 import _base
+from constants import voidElements
 from xml.sax.saxutils import escape
 
 # Really crappy basic implementation of a DOM-core like thing
@@ -13,6 +14,9 @@ class Node(_base.Node):
     def __unicode__(self):
         return self.name
 
+    def toxml(self):
+        raise NotImplementedError
+
     def __repr__(self):
         return "<%s %s>" % (self.__class__, self.name)
 
@@ -71,18 +75,24 @@ class Document(Node):
     def __unicode__(self):
         return "#document"
 
+    def toxml(self, encoding="utf=8"):
+        result = ""
+        for child in self.childNodes:
+            result += child.toxml()
+        return result.encode(encoding)
+
+    def hilite(self, encoding="utf-8"):
+        result = "<pre>"
+        for child in self.childNodes:
+            result += child.hilite()
+        return result.encode(encoding) + "</pre>"
+    
     def printTree(self):
         tree = unicode(self)
         for child in self.childNodes:
             tree += child.printTree(2)
         return tree
 
-    def toxml(self, encoding="utf=8"):
-        result = ''
-        for child in self.childNodes:
-            result += child.toxml()
-        return result.encode(encoding)
-
 class DocumentType(Node):
     def __init__(self, name):
         Node.__init__(self, name)
@@ -90,6 +100,11 @@ class DocumentType(Node):
     def __unicode__(self):
         return "<!DOCTYPE %s>" % self.name
 
+    toxml = __unicode__
+    
+    def hilite(self):
+        return '<code class="markup doctype">&lt;!DOCTYPE %s></code>' % self.name
+
 class TextNode(Node):
     def __init__(self, value):
         Node.__init__(self, None)
@@ -100,6 +115,8 @@ class TextNode(Node):
 
     def toxml(self):
         return escape(self.value)
+    
+    hilite = toxml
 
 class Element(Node):
     def __init__(self, name):
@@ -109,16 +126,6 @@ class Element(Node):
     def __unicode__(self):
         return "<%s>" % self.name
 
-    def printTree(self, indent):
-        tree = '\n|%s%s' % (' '*indent, unicode(self))
-        indent += 2
-        if self.attributes:
-            for name, value in self.attributes.iteritems():
-                tree += '\n|%s%s="%s"' % (' ' * indent, name, value)
-        for child in self.childNodes:
-            tree += child.printTree(indent)
-        return tree
-
     def toxml(self):
         result = '<' + self.name
         if self.attributes:
@@ -132,6 +139,29 @@ class Element(Node):
         else:
             result += '/>'
         return result
+    
+    def hilite(self):
+        result = '&lt;<code class="markup element-name">%s</code>' % self.name
+        if self.attributes:
+            for name, value in self.attributes.iteritems():
+                result += ' <code class="markup attribute-name">%s</code>=<code class="markup attribute-value">"%s"</code>' % (name, escape(value, {'"':'&quot;'}))
+        if self.childNodes:
+            result += ">"
+            for child in self.childNodes:
+                result += child.hilite()
+        elif self.name in voidElements:
+            return result + ">"
+        return result + '&lt;/<code class="markup element-name">%s</code>>' % self.name
+
+    def printTree(self, indent):
+        tree = '\n|%s%s' % (' '*indent, unicode(self))
+        indent += 2
+        if self.attributes:
+            for name, value in self.attributes.iteritems():
+                tree += '\n|%s%s="%s"' % (' ' * indent, name, value)
+        for child in self.childNodes:
+            tree += child.printTree(indent)
+        return tree
 
 class CommentNode(Node):
     def __init__(self, data):
@@ -140,8 +170,12 @@ class CommentNode(Node):
 
     def __unicode__(self):
         return "<!-- %s -->" % self.data
+    
+    def toxml(self):
+        return "<!--%s-->" % self.data
 
-    toxml = __unicode__ 
+    def hilite(self):
+        return '<code class="markup comment">&lt;!--%s--></code>' % escape(self.data)
 
 class TreeBuilder(_base.TreeBuilder):
     documentClass = Document