More Python2.5 fixes

This commit is contained in:
Sam Ruby 2006-09-20 21:01:30 -04:00
parent 619719a585
commit 6bf282eab6
2 changed files with 19 additions and 2 deletions

View File

@ -66,6 +66,9 @@ except:
if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1]))
name2codepoint[name]=ord(codepoint)
# python 2.2 support
if not hasattr(__builtins__, 'basestring'): basestring=str
# This RE makes Beautiful Soup able to parse XML with namespaces.
sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
@ -870,7 +873,7 @@ def isString(s):
"""Convenience method that works with all 2.x versions of Python
to determine whether or not something is stringlike."""
try:
return isinstance(s, unicode) or isintance(s, basestring)
return isinstance(s, unicode) or isinstance(s, basestring)
except NameError:
return isinstance(s, str)
@ -1285,6 +1288,12 @@ class BeautifulStoneSoup(Tag, SGMLParser):
j = i + len(toHandle)
return j
def convert_charref(self, name):
return '&#%s;' % name
def convert_entityref(self, name):
return '&%s;' % name
class BeautifulSoup(BeautifulStoneSoup):
"""This parser knows the following facts about HTML:
@ -1655,6 +1664,8 @@ class UnicodeDammit:
'''Given a string and its encoding, decodes the string into Unicode.
%encoding is a string recognized by encodings.aliases'''
if not data: return u''
# strip Byte Order Mark (if present)
if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
and (data[2:4] != '\x00\x00'):

View File

@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
"""
__version__ = "4.2-pre-" + "$Revision: 1.139 $"[11:16] + "-cvs"
__version__ = "4.2-pre-" + "$Revision: 1.141 $"[11:16] + "-cvs"
__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
@ -1715,6 +1715,12 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
# self.updatepos(declstartpos, i)
return None, -1
def convert_charref(self, name):
return '&#%s;' % name
def convert_entityref(self, name):
return '&%s;' % name
def output(self):
'''Return processed HTML as a single string'''
return ''.join([str(p) for p in self.pieces])