More Python2.5 fixes

This commit is contained in:
Sam Ruby 2006-09-20 21:01:30 -04:00
parent 619719a585
commit 6bf282eab6
2 changed files with 19 additions and 2 deletions

View File

@ -66,6 +66,9 @@ except:
if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1])) if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1]))
name2codepoint[name]=ord(codepoint) name2codepoint[name]=ord(codepoint)
# python 2.2 support
if not hasattr(__builtins__, 'basestring'): basestring=str
# This RE makes Beautiful Soup able to parse XML with namespaces. # This RE makes Beautiful Soup able to parse XML with namespaces.
sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
@ -870,7 +873,7 @@ def isString(s):
"""Convenience method that works with all 2.x versions of Python """Convenience method that works with all 2.x versions of Python
to determine whether or not something is stringlike.""" to determine whether or not something is stringlike."""
try: try:
return isinstance(s, unicode) or isintance(s, basestring) return isinstance(s, unicode) or isinstance(s, basestring)
except NameError: except NameError:
return isinstance(s, str) return isinstance(s, str)
@ -1285,6 +1288,12 @@ class BeautifulStoneSoup(Tag, SGMLParser):
j = i + len(toHandle) j = i + len(toHandle)
return j return j
def convert_charref(self, name):
return '&#%s;' % name
def convert_entityref(self, name):
return '&%s;' % name
class BeautifulSoup(BeautifulStoneSoup): class BeautifulSoup(BeautifulStoneSoup):
"""This parser knows the following facts about HTML: """This parser knows the following facts about HTML:
@ -1655,6 +1664,8 @@ class UnicodeDammit:
'''Given a string and its encoding, decodes the string into Unicode. '''Given a string and its encoding, decodes the string into Unicode.
%encoding is a string recognized by encodings.aliases''' %encoding is a string recognized by encodings.aliases'''
if not data: return u''
# strip Byte Order Mark (if present) # strip Byte Order Mark (if present)
if (len(data) >= 4) and (data[:2] == '\xfe\xff') \ if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
and (data[2:4] != '\x00\x00'): and (data[2:4] != '\x00\x00'):

View File

@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/> Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
""" """
__version__ = "4.2-pre-" + "$Revision: 1.139 $"[11:16] + "-cvs" __version__ = "4.2-pre-" + "$Revision: 1.141 $"[11:16] + "-cvs"
__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. __license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
Redistribution and use in source and binary forms, with or without modification, Redistribution and use in source and binary forms, with or without modification,
@ -1715,6 +1715,12 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
# self.updatepos(declstartpos, i) # self.updatepos(declstartpos, i)
return None, -1 return None, -1
def convert_charref(self, name):
return '&#%s;' % name
def convert_entityref(self, name):
return '&%s;' % name
def output(self): def output(self):
'''Return processed HTML as a single string''' '''Return processed HTML as a single string'''
return ''.join([str(p) for p in self.pieces]) return ''.join([str(p) for p in self.pieces])