planet/planet/html5lib/constants.py
2007-01-11 15:05:30 -05:00

457 lines
9.8 KiB
Python

import string
try:
frozenset
except NameError:
# Import from the sets module for python 2.3
from sets import Set as set
from sets import ImmutableSet as frozenset
EOF = None
contentModelFlags = {
"PCDATA":0,
"RCDATA":1,
"CDATA":2,
"PLAINTEXT":3
}
scopingElements = frozenset((
"button",
"caption",
"html",
"marquee",
"object",
"table",
"td",
"th"
))
formattingElements = frozenset((
"a",
"b",
"big",
"em",
"font",
"i",
"nobr",
"s",
"small",
"strike",
"strong",
"tt",
"u"
))
specialElements = frozenset((
"address",
"area",
"base",
"basefont",
"bgsound",
"blockquote",
"body",
"br",
"center",
"col",
"colgroup",
"dd",
"dir",
"div",
"dl",
"dt",
"embed",
"fieldset",
"form",
"frame",
"frameset",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"hr",
"iframe",
"image",
"img",
"input",
"isindex",
"li",
"link",
"listing",
"menu",
"meta",
"noembed",
"noframes",
"noscript",
"ol",
"optgroup",
"option",
"p",
"param",
"plaintext",
"pre",
"script",
"select",
"spacer",
"style",
"tbody",
"textarea",
"tfoot",
"thead",
"title",
"tr",
"ul",
"wbr"
))
spaceCharacters = frozenset((
u"\t",
u"\n",
u"\u000B",
u"\u000C",
u" "
))
tableInsertModeElements = frozenset((
"table",
"tbody",
"tfoot",
"thead",
"tr"
))
asciiLowercase = frozenset(string.ascii_lowercase)
asciiLetters = frozenset(string.ascii_letters)
digits = frozenset(string.digits)
hexDigits = frozenset(string.hexdigits)
asciiUpper2Lower = dict([(ord(c),ord(c.lower()))
for c in string.ascii_uppercase])
# Heading elements need to be ordered
headingElements = (
"h1",
"h2",
"h3",
"h4",
"h5",
"h6"
)
# XXX What about event-source and command?
voidElements = frozenset((
"base",
"link",
"meta",
"hr",
"br",
"img",
"embed",
"param",
"area",
"col",
"input"
))
# entitiesWindows1252 has to be _ordered_ and needs to have an index. It
# therefore can't be a frozenset.
entitiesWindows1252 = (
8364, # 0x80 0x20AC EURO SIGN
65533, # 0x81 UNDEFINED
8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK
402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK
8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK
8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS
8224, # 0x86 0x2020 DAGGER
8225, # 0x87 0x2021 DOUBLE DAGGER
710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
8240, # 0x89 0x2030 PER MILLE SIGN
352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON
8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE
65533, # 0x8D UNDEFINED
381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON
65533, # 0x8F UNDEFINED
65533, # 0x90 UNDEFINED
8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK
8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK
8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK
8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK
8226, # 0x95 0x2022 BULLET
8211, # 0x96 0x2013 EN DASH
8212, # 0x97 0x2014 EM DASH
732, # 0x98 0x02DC SMALL TILDE
8482, # 0x99 0x2122 TRADE MARK SIGN
353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON
8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE
65533, # 0x9D UNDEFINED
382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON
376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
)
entities = {
"AElig": u"\u00C6",
"Aacute": u"\u00C1",
"Acirc": u"\u00C2",
"Agrave": u"\u00C0",
"Alpha": u"\u0391",
"Aring": u"\u00C5",
"Atilde": u"\u00C3",
"Auml": u"\u00C4",
"Beta": u"\u0392",
"Ccedil": u"\u00C7",
"Chi": u"\u03A7",
"Dagger": u"\u2021",
"Delta": u"\u0394",
"ETH": u"\u00D0",
"Eacute": u"\u00C9",
"Ecirc": u"\u00CA",
"Egrave": u"\u00C8",
"Epsilon": u"\u0395",
"Eta": u"\u0397",
"Euml": u"\u00CB",
"Gamma": u"\u0393",
"Iacute": u"\u00CD",
"Icirc": u"\u00CE",
"Igrave": u"\u00CC",
"Iota": u"\u0399",
"Iuml": u"\u00CF",
"Kappa": u"\u039A",
"Lambda": u"\u039B",
"Mu": u"\u039C",
"Ntilde": u"\u00D1",
"Nu": u"\u039D",
"OElig": u"\u0152",
"Oacute": u"\u00D3",
"Ocirc": u"\u00D4",
"Ograve": u"\u00D2",
"Omega": u"\u03A9",
"Omicron": u"\u039F",
"Oslash": u"\u00D8",
"Otilde": u"\u00D5",
"Ouml": u"\u00D6",
"Phi": u"\u03A6",
"Pi": u"\u03A0",
"Prime": u"\u2033",
"Psi": u"\u03A8",
"Rho": u"\u03A1",
"Scaron": u"\u0160",
"Sigma": u"\u03A3",
"THORN": u"\u00DE",
"Tau": u"\u03A4",
"Theta": u"\u0398",
"Uacute": u"\u00DA",
"Ucirc": u"\u00DB",
"Ugrave": u"\u00D9",
"Upsilon": u"\u03A5",
"Uuml": u"\u00DC",
"Xi": u"\u039E",
"Yacute": u"\u00DD",
"Yuml": u"\u0178",
"Zeta": u"\u0396",
"aacute": u"\u00E1",
"acirc": u"\u00E2",
"acute": u"\u00B4",
"aelig": u"\u00E6",
"agrave": u"\u00E0",
"alefsym": u"\u2135",
"alpha": u"\u03B1",
"amp": u"\u0026",
"AMP": u"\u0026",
"and": u"\u2227",
"ang": u"\u2220",
"apos": u"\u0027",
"aring": u"\u00E5",
"asymp": u"\u2248",
"atilde": u"\u00E3",
"auml": u"\u00E4",
"bdquo": u"\u201E",
"beta": u"\u03B2",
"brvbar": u"\u00A6",
"bull": u"\u2022",
"cap": u"\u2229",
"ccedil": u"\u00E7",
"cedil": u"\u00B8",
"cent": u"\u00A2",
"chi": u"\u03C7",
"circ": u"\u02C6",
"clubs": u"\u2663",
"cong": u"\u2245",
"copy": u"\u00A9",
"COPY": u"\u00A9",
"crarr": u"\u21B5",
"cup": u"\u222A",
"curren": u"\u00A4",
"dArr": u"\u21D3",
"dagger": u"\u2020",
"darr": u"\u2193",
"deg": u"\u00B0",
"delta": u"\u03B4",
"diams": u"\u2666",
"divide": u"\u00F7",
"eacute": u"\u00E9",
"ecirc": u"\u00EA",
"egrave": u"\u00E8",
"empty": u"\u2205",
"emsp": u"\u2003",
"ensp": u"\u2002",
"epsilon": u"\u03B5",
"equiv": u"\u2261",
"eta": u"\u03B7",
"eth": u"\u00F0",
"euml": u"\u00EB",
"euro": u"\u20AC",
"exist": u"\u2203",
"fnof": u"\u0192",
"forall": u"\u2200",
"frac12": u"\u00BD",
"frac14": u"\u00BC",
"frac34": u"\u00BE",
"frasl": u"\u2044",
"gamma": u"\u03B3",
"ge": u"\u2265",
"gt": u"\u003E",
"GT": u"\u003E",
"hArr": u"\u21D4",
"harr": u"\u2194",
"hearts": u"\u2665",
"hellip": u"\u2026",
"iacute": u"\u00ED",
"icirc": u"\u00EE",
"iexcl": u"\u00A1",
"igrave": u"\u00EC",
"image": u"\u2111",
"infin": u"\u221E",
"int": u"\u222B",
"iota": u"\u03B9",
"iquest": u"\u00BF",
"isin": u"\u2208",
"iuml": u"\u00EF",
"kappa": u"\u03BA",
"lArr": u"\u21D0",
"lambda": u"\u03BB",
"lang": u"\u2329",
"laquo": u"\u00AB",
"larr": u"\u2190",
"lceil": u"\u2308",
"ldquo": u"\u201C",
"le": u"\u2264",
"lfloor": u"\u230A",
"lowast": u"\u2217",
"loz": u"\u25CA",
"lrm": u"\u200E",
"lsaquo": u"\u2039",
"lsquo": u"\u2018",
"lt": u"\u003C",
"LT": u"\u003C",
"macr": u"\u00AF",
"mdash": u"\u2014",
"micro": u"\u00B5",
"middot": u"\u00B7",
"minus": u"\u2212",
"mu": u"\u03BC",
"nabla": u"\u2207",
"nbsp": u"\u00A0",
"ndash": u"\u2013",
"ne": u"\u2260",
"ni": u"\u220B",
"not": u"\u00AC",
"notin": u"\u2209",
"nsub": u"\u2284",
"ntilde": u"\u00F1",
"nu": u"\u03BD",
"oacute": u"\u00F3",
"ocirc": u"\u00F4",
"oelig": u"\u0153",
"ograve": u"\u00F2",
"oline": u"\u203E",
"omega": u"\u03C9",
"omicron": u"\u03BF",
"oplus": u"\u2295",
"or": u"\u2228",
"ordf": u"\u00AA",
"ordm": u"\u00BA",
"oslash": u"\u00F8",
"otilde": u"\u00F5",
"otimes": u"\u2297",
"ouml": u"\u00F6",
"para": u"\u00B6",
"part": u"\u2202",
"permil": u"\u2030",
"perp": u"\u22A5",
"phi": u"\u03C6",
"pi": u"\u03C0",
"piv": u"\u03D6",
"plusmn": u"\u00B1",
"pound": u"\u00A3",
"prime": u"\u2032",
"prod": u"\u220F",
"prop": u"\u221D",
"psi": u"\u03C8",
"quot": u"\u0022",
"QUOT": u"\u0022",
"rArr": u"\u21D2",
"radic": u"\u221A",
"rang": u"\u232A",
"raquo": u"\u00BB",
"rarr": u"\u2192",
"rceil": u"\u2309",
"rdquo": u"\u201D",
"real": u"\u211C",
"reg": u"\u00AE",
"REG": u"\u00AE",
"rfloor": u"\u230B",
"rho": u"\u03C1",
"rlm": u"\u200F",
"rsaquo": u"\u203A",
"rsquo": u"\u2019",
"sbquo": u"\u201A",
"scaron": u"\u0161",
"sdot": u"\u22C5",
"sect": u"\u00A7",
"shy": u"\u00AD",
"sigma": u"\u03C3",
"sigmaf": u"\u03C2",
"sim": u"\u223C",
"spades": u"\u2660",
"sub": u"\u2282",
"sube": u"\u2286",
"sum": u"\u2211",
"sup": u"\u2283",
"sup1": u"\u00B9",
"sup2": u"\u00B2",
"sup3": u"\u00B3",
"supe": u"\u2287",
"szlig": u"\u00DF",
"tau": u"\u03C4",
"there4": u"\u2234",
"theta": u"\u03B8",
"thetasym": u"\u03D1",
"thinsp": u"\u2009",
"thorn": u"\u00FE",
"tilde": u"\u02DC",
"times": u"\u00D7",
"trade": u"\u2122",
"uArr": u"\u21D1",
"uacute": u"\u00FA",
"uarr": u"\u2191",
"ucirc": u"\u00FB",
"ugrave": u"\u00F9",
"uml": u"\u00A8",
"upsih": u"\u03D2",
"upsilon": u"\u03C5",
"uuml": u"\u00FC",
"weierp": u"\u2118",
"xi": u"\u03BE",
"yacute": u"\u00FD",
"yen": u"\u00A5",
"yuml": u"\u00FF",
"zeta": u"\u03B6",
"zwj": u"\u200D",
"zwnj": u"\u200C"
}