More Python2.5 fixes
This commit is contained in:
parent
619719a585
commit
6bf282eab6
@ -66,6 +66,9 @@ except:
|
||||
if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1]))
|
||||
name2codepoint[name]=ord(codepoint)
|
||||
|
||||
# python 2.2 support
|
||||
if not hasattr(__builtins__, 'basestring'): basestring=str
|
||||
|
||||
# This RE makes Beautiful Soup able to parse XML with namespaces.
|
||||
sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
|
||||
|
||||
@ -870,7 +873,7 @@ def isString(s):
|
||||
"""Convenience method that works with all 2.x versions of Python
|
||||
to determine whether or not something is stringlike."""
|
||||
try:
|
||||
return isinstance(s, unicode) or isintance(s, basestring)
|
||||
return isinstance(s, unicode) or isinstance(s, basestring)
|
||||
except NameError:
|
||||
return isinstance(s, str)
|
||||
|
||||
@ -1285,6 +1288,12 @@ class BeautifulStoneSoup(Tag, SGMLParser):
|
||||
j = i + len(toHandle)
|
||||
return j
|
||||
|
||||
def convert_charref(self, name):
|
||||
return '&#%s;' % name
|
||||
|
||||
def convert_entityref(self, name):
|
||||
return '&%s;' % name
|
||||
|
||||
class BeautifulSoup(BeautifulStoneSoup):
|
||||
|
||||
"""This parser knows the following facts about HTML:
|
||||
@ -1655,6 +1664,8 @@ class UnicodeDammit:
|
||||
'''Given a string and its encoding, decodes the string into Unicode.
|
||||
%encoding is a string recognized by encodings.aliases'''
|
||||
|
||||
if not data: return u''
|
||||
|
||||
# strip Byte Order Mark (if present)
|
||||
if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
|
||||
and (data[2:4] != '\x00\x00'):
|
||||
|
@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
|
||||
Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
|
||||
"""
|
||||
|
||||
__version__ = "4.2-pre-" + "$Revision: 1.139 $"[11:16] + "-cvs"
|
||||
__version__ = "4.2-pre-" + "$Revision: 1.141 $"[11:16] + "-cvs"
|
||||
__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
@ -1715,6 +1715,12 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
|
||||
# self.updatepos(declstartpos, i)
|
||||
return None, -1
|
||||
|
||||
def convert_charref(self, name):
|
||||
return '&#%s;' % name
|
||||
|
||||
def convert_entityref(self, name):
|
||||
return '&%s;' % name
|
||||
|
||||
def output(self):
|
||||
'''Return processed HTML as a single string'''
|
||||
return ''.join([str(p) for p in self.pieces])
|
||||
|
Loading…
Reference in New Issue
Block a user