diff --git a/planet/feedparser.py b/planet/feedparser.py
index 1860539..b3b2467 100755
--- a/planet/feedparser.py
+++ b/planet/feedparser.py
@@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
Recommended: CJKCodecs and iconv_codec
"""
-__version__ = "4.2-pre-" + "$Revision: 1.146 $"[11:16] + "-cvs"
+__version__ = "4.2-pre-" + "$Revision: 1.147 $"[11:16] + "-cvs"
__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
@@ -2303,19 +2303,20 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
'class', 'color', 'color-rendering', 'content', 'cx', 'cy', 'd',
'descent', 'display', 'dur', 'end', 'fill', 'fill-rule', 'font-family',
'font-size', 'font-stretch', 'font-style', 'font-variant',
- 'font-weight', 'from', 'fx', 'fy', 'g1', 'g2', 'glyph-name', 'hanging',
- 'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
- 'keyPoints', 'keySplines', 'keyTimes', 'lang', 'mathematical', 'max',
- 'min', 'name', 'offset', 'opacity', 'origin', 'overline-position',
- 'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
- 'preserveAspectRatio', 'r', 'repeatCount', 'repeatDur',
- 'requiredExtensions', 'requiredFeatures', 'restart', 'rotate', 'rx',
- 'ry', 'slope', 'stemh', 'stemv', 'stop-color', 'stop-opacity',
- 'strikethrough-position', 'strikethrough-thickness', 'stroke',
- 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
- 'stroke-linejoin', 'stroke-miterlimit', 'stroke-width',
- 'systemLanguage', 'target', 'text-anchor', 'to', 'transform', 'type',
- 'u1', 'u2', 'underline-position', 'underline-thickness', 'unicode',
+ 'font-weight', 'from', 'fx', 'fy', 'g1', 'g2', 'glyph-name',
+ 'gradientUnits', 'hanging', 'height', 'horiz-adv-x', 'horiz-origin-x',
+ 'id', 'ideographic', 'k', 'keyPoints', 'keySplines', 'keyTimes',
+ 'lang', 'mathematical', 'max', 'min', 'name', 'offset', 'opacity',
+ 'origin', 'overline-position', 'overline-thickness', 'panose-1',
+ 'path', 'pathLength', 'points', 'preserveAspectRatio', 'r',
+ 'repeatCount', 'repeatDur', 'requiredExtensions', 'requiredFeatures',
+ 'restart', 'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv',
+ 'stop-color', 'stop-opacity', 'strikethrough-position',
+ 'strikethrough-thickness', 'stroke', 'stroke-dasharray',
+ 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin',
+ 'stroke-miterlimit', 'stroke-width', 'systemLanguage', 'target',
+ 'text-anchor', 'to', 'transform', 'type', 'u1', 'u2',
+ 'underline-position', 'underline-thickness', 'unicode',
'unicode-range', 'units-per-em', 'values', 'version', 'viewBox',
'visibility', 'width', 'widths', 'x', 'x-height', 'x1', 'x2',
'xlink:actuate', 'xlink:arcrole', 'xlink:href', 'xlink:role',
@@ -3021,6 +3022,21 @@ _additional_timezones = {'AT': -400, 'ET': -500, 'CT': -600, 'MT': -700, 'PT': -
rfc822._timezones.update(_additional_timezones)
registerDateHandler(_parse_date_rfc822)
+def _parse_date_perforce(aDateString):
+ """parse a date in yyyy/mm/dd hh:mm:ss TTT format"""
+ # Fri, 2006/09/15 08:19:53 EDT
+ _my_date_pattern = re.compile( \
+ r'(\w{,3}), (\d{,4})/(\d{,2})/(\d{2}) (\d{,2}):(\d{2}):(\d{2}) (\w{,3})')
+
+ dow, year, month, day, hour, minute, second, tz = \
+ _my_date_pattern.search(aDateString).groups()
+ months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
+ dateString = "%s, %s %s %s %s:%s:%s %s" % (dow, day, months[int(month) - 1], year, hour, minute, second, tz)
+ tm = rfc822.parsedate_tz(dateString)
+ if tm:
+ return time.gmtime(rfc822.mktime_tz(tm))
+registerDateHandler(_parse_date_perforce)
+
def _parse_date(dateString):
'''Parses a variety of date formats into a 9-tuple in GMT'''
for handler in _date_handlers:
diff --git a/planet/html5lib/html5parser.py b/planet/html5lib/html5parser.py
index 6e1e70a..e075001 100644
--- a/planet/html5lib/html5parser.py
+++ b/planet/html5lib/html5parser.py
@@ -37,10 +37,10 @@ class HTMLParser(object):
def __init__(self, strict = False, tree=simpletree.TreeBuilder):
"""
- strict - raise an exception when a parse error is encountered
-
- tree - a treebuilder class controlling the type of tree that will be
- returned. This class is almost always a subclass of
+ strict - raise an exception when a parse error is encountered
+
+ tree - a treebuilder class controlling the type of tree that will be
+ returned. This class is almost always a subclass of
html5lib.treebuilders._base.TreeBuilder
"""
@@ -72,10 +72,10 @@ class HTMLParser(object):
def parse(self, stream, encoding=None, innerHTML=False):
"""Parse a HTML document into a well-formed tree
-
+
stream - a filelike object or string containing the HTML to be parsed
-
- innerHTML - Are we parsing in innerHTML mode (note innerHTML=True
+
+ innerHTML - Are we parsing in innerHTML mode (note innerHTML=True
is not yet supported)
The optional encoding parameter must be a string that indicates
@@ -85,6 +85,7 @@ class HTMLParser(object):
"""
self.tree.reset()
+ self.firstStartTag = False
self.errors = []
self.phase = self.phases["initial"]
@@ -119,8 +120,8 @@ class HTMLParser(object):
return self.tree.getDocument()
def parseError(self, data="XXX ERROR MESSAGE NEEDED"):
- # The idea is to make data mandatory.
- self.errors.append(data)
+ # XXX The idea is to make data mandatory.
+ self.errors.append((self.tokenizer.stream.position(), data))
if self.strict:
raise ParseError
@@ -130,7 +131,7 @@ class HTMLParser(object):
def normalizeToken(self, token):
""" HTML5 specific normalizations to the token stream """
-
+
if token["type"] == "EmptyTag":
# When a solidus (/) is encountered within a tag name what happens
# depends on whether the current tag name matches that of a void
@@ -159,14 +160,12 @@ class HTMLParser(object):
token["data"] = {}
elif token["type"] == "EndTag":
+ if token["data"]:
+ self.parseError(_("End tag contains unexpected attributes."))
token["name"] = token["name"].lower()
return token
- #XXX - almost everthing after this point should be moved into a
- #seperate treebuilder object
-
-
def resetInsertionMode(self):
# The name of this method is mostly historical. (It's also used in the
# specification.)
@@ -231,13 +230,19 @@ class Phase(object):
def processEOF(self):
self.tree.generateImpliedEndTags()
- if self.parser.innerHTML == True and len(self.tree.openElements) > 1:
- # XXX No need to check for "body" because our EOF handling is not
- # per specification. (Specification needs an update.)
- #
- # XXX Need to check this more carefully in the future.
- self.parser.parseError()
- # Stop parsing
+ if len(self.tree.openElements) > 2:
+ self.parser.parseError(_("Unexpected end of file. "
+ u"Missing closing tags."))
+ elif len(self.tree.openElements) == 2 and\
+ self.tree.openElements[1].name != "body":
+ # This happens for framesets or something?
+ self.parser.parseError(_("Unexpected end of file. Expected end "
+ u"tag (" + self.tree.openElements[1].name + u") first."))
+ elif self.parser.innerHTML and len(self.tree.openElements) > 1 :
+ # XXX This is not what the specification says. Not sure what to do
+ # here.
+ self.parser.parseError(_("XXX innerHTML EOF"))
+ # Betting ends.
def processComment(self, data):
# For most phases the following is correct. Where it's not it will be
@@ -245,7 +250,7 @@ class Phase(object):
self.tree.insertComment(data, self.tree.openElements[-1])
def processDoctype(self, name, error):
- self.parser.parseError()
+ self.parser.parseError(_("Unexpected DOCTYPE. Ignored."))
def processSpaceCharacters(self, data):
self.tree.insertText(data)
@@ -254,11 +259,14 @@ class Phase(object):
self.startTagHandler[name](name, attributes)
def startTagHtml(self, name, attributes):
+ if self.parser.firstStartTag == False and name == "html":
+ self.parser.parseError(_("html needs to be the first start tag."))
# XXX Need a check here to see if the first start tag token emitted is
# this token... If it's not, invoke self.parser.parseError().
for attr, value in attributes.iteritems():
if attr not in self.tree.openElements[0].attributes:
self.tree.openElements[0].attributes[attr] = value
+ self.parser.firstStartTag = False
def processEndTag(self, name):
self.endTagHandler[name](name)
@@ -270,7 +278,7 @@ class InitialPhase(Phase):
# "quirks mode". It is expected that a future version of HTML5 will defin
# this.
def processEOF(self):
- self.parser.parseError(_("No DOCTYPE seen."))
+ self.parser.parseError(_(u"Unexpected End of file. Expected DOCTYPE."))
self.parser.phase = self.parser.phases["rootElement"]
self.parser.phase.processEOF()
@@ -279,7 +287,7 @@ class InitialPhase(Phase):
def processDoctype(self, name, error):
if error:
- self.parser.parseError(_("DOCTYPE is in error."))
+ self.parser.parseError(_("Erroneous DOCTYPE."))
self.tree.insertDoctype(name)
self.parser.phase = self.parser.phases["rootElement"]
@@ -287,17 +295,20 @@ class InitialPhase(Phase):
self.tree.insertText(data, self.tree.document)
def processCharacters(self, data):
- self.parser.parseError(_("No DOCTYPE seen."))
+ self.parser.parseError(_(u"Unexpected non-space characters. "
+ u"Expected DOCTYPE."))
self.parser.phase = self.parser.phases["rootElement"]
self.parser.phase.processCharacters(data)
def processStartTag(self, name, attributes):
- self.parser.parseError(_("No DOCTYPE seen."))
+ self.parser.parseError(_(u"Unexpected start tag (" + name +\
+ u"). Expected DOCTYPE."))
self.parser.phase = self.parser.phases["rootElement"]
self.parser.phase.processStartTag(name, attributes)
def processEndTag(self, name):
- self.parser.parseError(_("No DOCTYPE seen."))
+ self.parser.parseError(_(u"Unexpected end tag (" + name +\
+ "). Expected DOCTYPE."))
self.parser.phase = self.parser.phases["rootElement"]
self.parser.phase.processEndTag(name)
@@ -326,6 +337,8 @@ class RootElementPhase(Phase):
self.parser.phase.processCharacters(data)
def processStartTag(self, name, attributes):
+ if name == "html":
+ self.parser.firstStartTag = True
self.insertHtmlElement()
self.parser.phase.processStartTag(name, attributes)
@@ -372,7 +385,7 @@ class BeforeHeadPhase(Phase):
def endTagOther(self, name):
self.parser.parseError(_("Unexpected end tag (" + name +\
- ") after the root element."))
+ ") after the (implied) root element."))
class InHeadPhase(Phase):
def __init__(self, parser, tree):
@@ -380,7 +393,8 @@ class InHeadPhase(Phase):
self.startTagHandler = utils.MethodDispatcher([
("html", self.startTagHtml),
- (("title", "style"), self.startTagTitleStyle),
+ ("title", self.startTagTitle),
+ ("style", self.startTagStyle),
("script", self.startTagScript),
(("base", "link", "meta"), self.startTagBaseLinkMeta),
("head", self.startTagHead)
@@ -405,6 +419,8 @@ class InHeadPhase(Phase):
# the real thing
def processEOF(self):
if self.tree.openElements[-1].name in ("title", "style", "script"):
+ self.parser.parseError(_(u"Unexpected end of file. "
+ u"Expected end tag (" + self.tree.openElements[-1].name + ")."))
self.tree.openElements.pop()
self.anythingElse()
self.parser.phase.processEOF()
@@ -421,25 +437,31 @@ class InHeadPhase(Phase):
self.tree.headPointer = self.tree.openElements[-1]
self.parser.phase = self.parser.phases["inHead"]
- def startTagTitleStyle(self, name, attributes):
- cmFlags = {"title":"RCDATA", "style":"CDATA"}
+ def startTagTitle(self, name, attributes):
element = self.tree.createElement(name, attributes)
self.appendToHead(element)
self.tree.openElements.append(element)
- self.parser.tokenizer.contentModelFlag =\
- contentModelFlags[cmFlags[name]]
+ self.parser.tokenizer.contentModelFlag = contentModelFlags["RCDATA"]
+
+ def startTagStyle(self, name, attributes):
+ element = self.tree.createElement(name, attributes)
+ if self.tree.headPointer is not None and\
+ self.parser.phase == self.parser.phases["inHead"]:
+ self.appendToHead(element)
+ else:
+ self.tree.openElements[-1].appendChild(element)
+ self.tree.openElements.append(element)
+ self.parser.tokenizer.contentModelFlag = contentModelFlags["CDATA"]
def startTagScript(self, name, attributes):
element = self.tree.createElement(name, attributes)
element._flags.append("parser-inserted")
-
- # XXX in theory we should check if we're actually in the InHead state
- # here and if the headElementPointer is not zero but it seems to work
- # without that being the case.
- self.tree.openElements[-1].appendChild(element)
+ if self.tree.headPointer is not None and\
+ self.parser.phase == self.parser.phases["inHead"]:
+ self.appendToHead(element)
+ else:
+ self.tree.openElements[-1].appendChild(element)
self.tree.openElements.append(element)
-
- # XXX AT we could use self.tree.insertElement(name, attributes) ...
self.parser.tokenizer.contentModelFlag = contentModelFlags["CDATA"]
def startTagBaseLinkMeta(self, name, attributes):
@@ -454,7 +476,7 @@ class InHeadPhase(Phase):
if self.tree.openElements[-1].name == "head":
self.tree.openElements.pop()
else:
- self.parser.parseError()
+ self.parser.parseError(_(u"Unexpected end tag (head). Ignored."))
self.parser.phase = self.parser.phases["afterHead"]
def endTagHtml(self, name):
@@ -465,11 +487,12 @@ class InHeadPhase(Phase):
if self.tree.openElements[-1].name == name:
self.tree.openElements.pop()
else:
- self.parser.parseError(_("Unexpected end tag " + name +\
- ". Ignored."))
+ self.parser.parseError(_(u"Unexpected end tag (" + name +\
+ "). Ignored."))
def endTagOther(self, name):
- self.parser.parseError(_("Unexpected end tag " + name + ". Ignored."))
+ self.parser.parseError(_(u"Unexpected end tag (" + name +\
+ "). Ignored."))
def anythingElse(self):
if self.tree.openElements[-1].name == "head":
@@ -507,7 +530,8 @@ class AfterHeadPhase(Phase):
self.parser.phase = self.parser.phases["inFrameset"]
def startTagFromHead(self, name, attributes):
- self.parser.parseError()
+ self.parser.parseError(_(u"Unexpected start tag (" + name +\
+ ") that can be in head. Moved."))
self.parser.phase = self.parser.phases["inHead"]
self.parser.phase.processStartTag(name, attributes)
@@ -531,8 +555,8 @@ class InBodyPhase(Phase):
Phase.__init__(self, parser, tree)
self.startTagHandler = utils.MethodDispatcher([
("html", self.startTagHtml),
- ("script", self.startTagScript),
- (("base", "link", "meta", "style", "title"),
+ (("script", "style"), self.startTagScriptStyle),
+ (("base", "link", "meta", "title"),
self.startTagFromHead),
("body", self.startTagBody),
(("address", "blockquote", "center", "dir", "div", "dl",
@@ -578,11 +602,12 @@ class InBodyPhase(Phase):
(("a", "b", "big", "em", "font", "i", "nobr", "s", "small",
"strike", "strong", "tt", "u"), self.endTagFormatting),
(("marquee", "object", "button"), self.endTagButtonMarqueeObject),
- (("caption", "col", "colgroup", "frame", "frameset", "head",
- "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
- "tr", "area", "basefont", "bgsound", "br", "embed", "hr",
- "image", "img", "input", "isindex", "param", "select", "spacer",
- "table", "wbr"),self.endTagMisplacedNone),
+ (("head", "frameset", "select", "optgroup", "option", "table",
+ "caption", "colgroup", "col", "thead", "tfoot", "tbody", "tr",
+ "td", "th"), self.endTagMisplaced),
+ (("area", "basefont", "bgsound", "br", "embed", "hr", "image",
+ "img", "input", "isindex", "param", "spacer", "wbr", "frame"),
+ self.endTagNone),
(("noframes", "noscript", "noembed", "textarea", "xmp", "iframe"),
self.endTagCdataTextAreaXmp),
(("event-source", "section", "nav", "article", "aside", "header",
@@ -604,16 +629,16 @@ class InBodyPhase(Phase):
self.tree.reconstructActiveFormattingElements()
self.tree.insertText(data)
- def startTagScript(self, name, attributes):
+ def startTagScriptStyle(self, name, attributes):
self.parser.phases["inHead"].processStartTag(name, attributes)
def startTagFromHead(self, name, attributes):
- self.parser.parseError(_("Unexpected start tag " + name +\
- " that belongs in the head. Moved."))
+ self.parser.parseError(_(u"Unexpected start tag (" + name +\
+ ") that belongs in the head. Moved."))
self.parser.phases["inHead"].processStartTag(name, attributes)
def startTagBody(self, name, attributes):
- self.parser.parseError(_("Unexpected start tag body"))
+ self.parser.parseError(_(u"Unexpected start tag (body)."))
if len(self.tree.openElements) == 1 \
or self.tree.openElements[1].name != "body":
assert self.parser.innerHTML
@@ -629,7 +654,7 @@ class InBodyPhase(Phase):
def startTagForm(self, name, attributes):
if self.tree.formPointer:
- self.parser.parseError()
+ self.parser.parseError("Unexpected start tag (form). Ignored.")
else:
if self.tree.elementInScope("p"):
self.endTagP("p")
@@ -667,7 +692,8 @@ class InBodyPhase(Phase):
self.endTagP("p")
for item in headingElements:
if self.tree.elementInScope(item):
- self.parser.parseError()
+ self.parser.parseError(_("Unexpected start tag (" + name +\
+ ")."))
item = self.tree.openElements.pop()
while item.name not in headingElements:
item = self.tree.openElements.pop()
@@ -677,7 +703,8 @@ class InBodyPhase(Phase):
def startTagA(self, name, attributes):
afeAElement = self.tree.elementInActiveFormattingElements("a")
if afeAElement:
- self.parser.parseError()
+ self.parser.parseError(_(u"Unexpected start tag (a) implies "
+ "end tag (a)."))
self.endTagFormatting("a")
if afeAElement in self.tree.openElements:
self.tree.openElements.remove(afeAElement)
@@ -692,8 +719,8 @@ class InBodyPhase(Phase):
def startTagButton(self, name, attributes):
if self.tree.elementInScope("button"):
- self.parser.parseError(_("Unexpected start tag button. Implying"
- "button end tag."))
+ self.parser.parseError(_("Unexpected start tag (button) implied "
+ "end tag (button)."))
self.processEndTag("button")
self.parser.phase.processStartTag(name, attributes)
else:
@@ -730,8 +757,8 @@ class InBodyPhase(Phase):
def startTagImage(self, name, attributes):
# No really...
- self.parser.parseError(_("Unexpected start tag image. Use img "
- "instead"))
+ self.parser.parseError(_(u"Unexpected start tag (image). Treated "
+ u"as img."))
self.processStartTag("img", attributes)
def startTagInput(self, name, attributes):
@@ -783,7 +810,8 @@ class InBodyPhase(Phase):
"option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
"tr", "noscript"
"""
- self.parser.parseError()
+ self.parser.parseError(_(u"Unexpected start tag (" + name +\
+ u"). Ignored."))
def startTagNew(self, name, other):
"""New HTML5 elements, "event-source", "section", "nav",
@@ -798,7 +826,7 @@ class InBodyPhase(Phase):
def endTagP(self, name):
self.tree.generateImpliedEndTags("p")
if self.tree.openElements[-1].name != "p":
- self.parser.parseError()
+ self.parser.parseError("Unexpected end tag (p).")
while self.tree.elementInScope("p"):
self.tree.openElements.pop()
@@ -811,7 +839,8 @@ class InBodyPhase(Phase):
self.parser.parseError()
return
if self.tree.openElements[-1].name != "body":
- self.parser.parseError()
+ self.parser.parseError(_("Unexpected end tag (body). Missing "
+ u"end tag (" + self.tree.openElements[-1].name + ")."))
self.parser.phase = self.parser.phases["afterBody"]
def endTagHtml(self, name):
@@ -824,7 +853,8 @@ class InBodyPhase(Phase):
if inScope:
self.tree.generateImpliedEndTags()
if self.tree.openElements[-1].name != name:
- self.parser.parseError()
+ self.parser.parseError((u"End tag (" + name + ") seen too "
+ u"early. Expected other end tag."))
if inScope:
node = self.tree.openElements.pop()
while node.name != name:
@@ -839,7 +869,8 @@ class InBodyPhase(Phase):
if self.tree.elementInScope(name):
self.tree.generateImpliedEndTags(name)
if self.tree.openElements[-1].name != name:
- self.parser.parseError()
+ self.parser.parseError((u"End tag (" + name + ") seen too "
+ u"early. Expected other end tag."))
if self.tree.elementInScope(name):
node = self.tree.openElements.pop()
@@ -852,7 +883,8 @@ class InBodyPhase(Phase):
self.tree.generateImpliedEndTags()
break
if self.tree.openElements[-1].name != name:
- self.parser.parseError()
+ self.parser.parseError((u"Unexpected end tag (" + name + "). "
+ u"Expected other end tag."))
for item in headingElements:
if self.tree.elementInScope(item):
@@ -864,23 +896,28 @@ class InBodyPhase(Phase):
def endTagFormatting(self, name):
"""The much-feared adoption agency algorithm
"""
+ # http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
+ # XXX Better parseError messages appreciated.
while True:
# Step 1 paragraph 1
afeElement = self.tree.elementInActiveFormattingElements(name)
if not afeElement or (afeElement in self.tree.openElements and
not self.tree.elementInScope(afeElement.name)):
- self.parser.parseError()
+ self.parser.parseError(_(u"End tag (" + name + ") violates "
+ u" step 1, paragraph 1 of the adoption agency algorithm."))
return
# Step 1 paragraph 2
elif afeElement not in self.tree.openElements:
- self.parser.parseError()
+ self.parser.parseError(_(u"End tag (" + name + ") violates "
+ u" step 1, paragraph 2 of the adoption agency algorithm."))
self.tree.activeFormattingElements.remove(afeElement)
return
# Step 1 paragraph 3
if afeElement != self.tree.openElements[-1]:
- self.parser.parseError()
+ self.parser.parseError(_(u"End tag (" + name + ") violates "
+ u" step 1, paragraph 3 of the adoption agency algorithm."))
# Step 2
# Start of the adoption agency algorithm proper
@@ -979,7 +1016,8 @@ class InBodyPhase(Phase):
if self.tree.elementInScope(name):
self.tree.generateImpliedEndTags()
if self.tree.openElements[-1].name != name:
- self.parser.parseError()
+ self.parser.parseError(_(u"Unexpected end tag (" + name +\
+ "). Expected other end tag first."))
if self.tree.elementInScope(name):
element = self.tree.openElements.pop()
@@ -987,24 +1025,21 @@ class InBodyPhase(Phase):
element = self.tree.openElements.pop()
self.tree.clearActiveFormattingElements()
- def endTagMisplacedNone(self, name):
- """ Elements that should be children of other elements that have a
- different insertion mode or elements that have no end tag;
- here they are ignored
- "caption", "col", "colgroup", "frame", "frameset", "head",
- "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
- "tr", "noscript, "area", "basefont", "bgsound", "br", "embed",
- "hr", "iframe", "image", "img", "input", "isindex", "noembed",
- "noframes", "param", "select", "spacer", "table", "textarea", "wbr""
- """
- self.parser.parseError()
+ def endTagMisplaced(self, name):
+ # This handles elements with end tags in other insertion modes.
+ self.parser.parseError(_(u"Unexpected end tag (" + name +\
+ u"). Ignored."))
+
+ def endTagNone(self, name):
+ # This handles elements with no end tag.
+ self.parser.parseError(_(u"This tag (" + name + u") has no end tag"))
def endTagCdataTextAreaXmp(self, name):
if self.tree.openElements[-1].name == name:
self.tree.openElements.pop()
else:
- self.parser.parseError(_("Unexpected end tag " + name +\
- ". Ignored."))
+ self.parser.parseError(_("Unexpected end tag (" + name +\
+ "). Ignored."))
def endTagNew(self, name):
"""New HTML5 elements, "event-source", "section", "nav",
@@ -1019,14 +1054,15 @@ class InBodyPhase(Phase):
if node.name == name:
self.tree.generateImpliedEndTags()
if self.tree.openElements[-1].name != name:
- self.parser.parseError(_("Unexpected end tag " + name +\
- "."))
+ self.parser.parseError(_("Unexpected end tag (" + name +\
+ ")."))
while self.tree.openElements.pop() != node:
pass
break
else:
if node.name in specialElements | scopingElements:
- self.parser.parseError()
+ self.parser.parseError(_(u"Unexpected end tag (" + name +\
+ "). Ignored."))
break
class InTablePhase(Phase):
@@ -1055,13 +1091,15 @@ class InTablePhase(Phase):
def clearStackToTableContext(self):
# "clear the stack back to a table context"
while self.tree.openElements[-1].name not in ("table", "html"):
+ self.parser.parseError(_(u"Unexpected implied end tag (" +\
+ self.tree.openElements[-1].name + u") in the table phase."))
self.tree.openElements.pop()
- self.parser.parseError()
# When the current node is it's an innerHTML case
# processing methods
def processCharacters(self, data):
- self.parser.parseError()
+ self.parser.parseError(_(u"Unexpected non-space characters in "
+ u"table context caused voodoo mode."))
# Make all the special element rearranging voodoo kick in
self.tree.insertFromTable = True
# Process the character in the "in body" mode
@@ -1099,7 +1137,8 @@ class InTablePhase(Phase):
self.parser.phase.processStartTag(name, attributes)
def startTagOther(self, name, attributes):
- self.parser.parseError()
+ self.parser.parseError(_(u"Unexpected start tag (" + name + u") in "
+ u"table context caused voodoo mode."))
# Make all the special element rearranging voodoo kick in
self.tree.insertFromTable = True
# Process the start tag in the "in body" mode
@@ -1109,7 +1148,7 @@ class InTablePhase(Phase):
def endTagTable(self, name):
if self.tree.elementInScope("table", True):
self.tree.generateImpliedEndTags()
- if self.tree.openElements[-1].name == "table":
+ if self.tree.openElements[-1].name != "table":
self.parser.parseError()
while self.tree.openElements[-1].name != "table":
self.tree.openElements.pop()
@@ -1120,9 +1159,12 @@ class InTablePhase(Phase):
# innerHTML case
def endTagIgnore(self, name):
- self.parser.parseError()
+ self.parser.parseError(_("Unexpected end tag (" + name +\
+ "). Ignored."))
def endTagOther(self, name):
+ self.parser.parseError(_(u"Unexpected end tag (" + name + u") in "
+ u"table context caused voodoo mode."))
# Make all the special element rearranging voodoo kick in
self.parser.insertFromTable = True
# Process the end tag in the "in body" mode
@@ -1169,10 +1211,12 @@ class InCaptionPhase(Phase):
if self.tree.elementInScope(name, True):
# AT this code is quite similar to endTagTable in "InTable"
self.tree.generateImpliedEndTags()
- if self.tree.openElements[-1].name == "caption":
- self.parser.parseError()
+ if self.tree.openElements[-1].name != "caption":
+ self.parser.parseError(_(u"Unexpected end tag (caption). "
+ u"Missing end tags."))
while self.tree.openElements[-1].name != "caption":
self.tree.openElements.pop()
+ self.tree.openElements.pop()
self.tree.clearActiveFormattingElements()
self.parser.phase = self.parser.phases["inTable"]
else:
@@ -1187,7 +1231,8 @@ class InCaptionPhase(Phase):
self.parser.phase.processStartTag(name, attributes)
def endTagIgnore(self, name):
- self.parser.parseError()
+ self.parser.parseError(_("Unexpected end tag (" + name +\
+ "). Ignored."))
def endTagOther(self, name):
self.parser.phases["inBody"].processEndTag(name)
@@ -1236,7 +1281,8 @@ class InColumnGroupPhase(Phase):
self.parser.phase = self.parser.phases["inTable"]
def endTagCol(self, name):
- self.parser.parseError()
+ self.parser.parseError(_(u"Unexpected end tag (col). "
+ u"col has no end tag."))
def endTagOther(self, name):
self.endTagColgroup("colgroup")
@@ -1269,8 +1315,9 @@ class InTableBodyPhase(Phase):
def clearStackToTableBodyContext(self):
while self.tree.openElements[-1].name not in ("tbody", "tfoot",
"thead", "html"):
+ self.parser.parseError(_(u"Unexpected implied end tag (" +\
+ self.tree.openElements[-1].name + u") in the table body phase."))
self.tree.openElements.pop()
- self.parser.parseError()
# the rest
def processCharacters(self,data):
@@ -1282,7 +1329,8 @@ class InTableBodyPhase(Phase):
self.parser.phase = self.parser.phases["inRow"]
def startTagTableCell(self, name, attributes):
- self.parser.parseError()
+ self.parser.parseError(_(u"Unexpected table cell start tag (" +\
+ name + u") in the table body phase."))
self.startTagTr("tr", {})
self.parser.phase.processStartTag(name, attributes)
@@ -1307,7 +1355,8 @@ class InTableBodyPhase(Phase):
self.tree.openElements.pop()
self.parser.phase = self.parser.phases["inTable"]
else:
- self.parser.parseError()
+ self.parser.parseError(_("Unexpected end tag (" + name +\
+ ") in the table body phase. Ignored."))
def endTagTable(self, name):
if self.tree.elementInScope("tbody", True) or \
@@ -1321,7 +1370,8 @@ class InTableBodyPhase(Phase):
self.parser.parseError()
def endTagIgnore(self, name):
- self.parser.parseError()
+ self.parser.parseError(_("Unexpected end tag (" + name +\
+ ") in the table body phase. Ignored."))
def endTagOther(self, name):
self.parser.phases["inTable"].processEndTag(name)
@@ -1351,8 +1401,9 @@ class InRowPhase(Phase):
# helper methods (XXX unify this with other table helper methods)
def clearStackToTableRowContext(self):
while self.tree.openElements[-1].name not in ("tr", "html"):
+ self.parser.parseError(_(u"Unexpected implied end tag (" +\
+ self.tree.openElements[-1].name + u") in the row phase."))
self.tree.openElements.pop()
- self.parser.parseError()
# the rest
def processCharacters(self, data):
@@ -1398,7 +1449,8 @@ class InRowPhase(Phase):
self.parser.parseError()
def endTagIgnore(self, name):
- self.parser.parseError()
+ self.parser.parseError(_("Unexpected end tag (" + name +\
+ u") in the row phase. Ignored."))
def endTagOther(self, name):
self.parser.phases["inTable"].processEndTag(name)
@@ -1452,7 +1504,8 @@ class InCellPhase(Phase):
if self.tree.elementInScope(name, True):
self.tree.generateImpliedEndTags(name)
if self.tree.openElements[-1].name != name:
- self.parser.parseError()
+ self.parser.parseError("Got table cell end tag (" + name +\
+ ") while required end tags are missing.")
while True:
node = self.tree.openElements.pop()
if node.name == name:
@@ -1462,10 +1515,12 @@ class InCellPhase(Phase):
self.tree.clearActiveFormattingElements()
self.parser.phase = self.parser.phases["inRow"]
else:
- self.parser.parseError()
+ self.parser.parseError(_("Unexpected end tag (" + name +\
+ "). Ignored."))
def endTagIgnore(self, name):
- self.parser.parseError()
+ self.parser.parseError(_("Unexpected end tag (" + name +\
+ "). Ignored."))
def endTagImply(self, name):
if self.tree.elementInScope(name, True):
@@ -1492,7 +1547,7 @@ class InSelectPhase(Phase):
("optgroup", self.startTagOptgroup),
("select", self.startTagSelect)
])
- self.startTagHandler.default = self.processAnythingElse
+ self.startTagHandler.default = self.startTagOther
self.endTagHandler = utils.MethodDispatcher([
("option", self.endTagOption),
@@ -1501,7 +1556,7 @@ class InSelectPhase(Phase):
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td",
"th"), self.endTagTableElements)
])
- self.endTagHandler.default = self.processAnythingElse
+ self.endTagHandler.default = self.endTagOther
# http://www.whatwg.org/specs/web-apps/current-work/#in-select
def processCharacters(self, data):
@@ -1521,14 +1576,20 @@ class InSelectPhase(Phase):
self.tree.insertElement(name, attributes)
def startTagSelect(self, name, attributes):
- self.parser.parseError()
+ self.parser.parseError(_(u"Unexpected start tag (select) in the "
+ u"select phase implies select start tag."))
self.endTagSelect("select")
+ def startTagOther(self, name, attributes):
+ self.parser.parseError(_(u"Unexpected start tag token (" + name +\
+ u") in the select phase. Ignored."))
+
def endTagOption(self, name):
if self.tree.openElements[-1].name == "option":
self.tree.openElements.pop()
else:
- self.parser.parseError()
+ self.parser.parseError(_(u"Unexpected end tag (option) in the "
+ u"select phase. Ignored."))
def endTagOptgroup(self, name):
# implicitly closes