Support python 2.2 through python 2.5
This commit is contained in:
parent
9c56880e8d
commit
b75ba9684e
@ -821,7 +821,8 @@ class SoupStrainer:
|
|||||||
def _matches(self, markup, matchAgainst):
|
def _matches(self, markup, matchAgainst):
|
||||||
#print "Matching %s against %s" % (markup, matchAgainst)
|
#print "Matching %s against %s" % (markup, matchAgainst)
|
||||||
result = False
|
result = False
|
||||||
if matchAgainst == True and type(matchAgainst) == types.BooleanType:
|
if matchAgainst == True and (not hasattr(types, 'BooleanType') or
|
||||||
|
type(matchAgainst) == types.BooleanType):
|
||||||
result = markup != None
|
result = markup != None
|
||||||
elif callable(matchAgainst):
|
elif callable(matchAgainst):
|
||||||
result = matchAgainst(markup)
|
result = matchAgainst(markup)
|
||||||
|
@ -67,8 +67,8 @@ def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=Tru
|
|||||||
options = {}
|
options = {}
|
||||||
|
|
||||||
# add original options
|
# add original options
|
||||||
for key, value in orig_config.items(list):
|
for key in orig_config.options(list):
|
||||||
options[key] = value
|
options[key] = orig_config.get(list, key)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if use_cache:
|
if use_cache:
|
||||||
@ -85,8 +85,9 @@ def downloadReadingList(list, orig_config, callback, use_cache=True, re_read=Tru
|
|||||||
cached_config.set(list, key, value)
|
cached_config.set(list, key, value)
|
||||||
|
|
||||||
# read list
|
# read list
|
||||||
|
curdir=getattr(os.path, 'curdir', '.')
|
||||||
if sys.platform.find('win') < 0:
|
if sys.platform.find('win') < 0:
|
||||||
base = urljoin('file:', os.path.abspath(os.path.curdir))
|
base = urljoin('file:', os.path.abspath(curdir))
|
||||||
else:
|
else:
|
||||||
path = os.path.abspath(os.path.curdir)
|
path = os.path.abspath(os.path.curdir)
|
||||||
base = urljoin('file:///', path.replace(':','|').replace('\\','/'))
|
base = urljoin('file:///', path.replace(':','|').replace('\\','/'))
|
||||||
|
@ -130,6 +130,18 @@ try:
|
|||||||
except:
|
except:
|
||||||
chardet = None
|
chardet = None
|
||||||
|
|
||||||
|
# reversable htmlentitydefs mappings for Python 2.2
|
||||||
|
try:
|
||||||
|
from htmlentitydefs import name2codepoint, codepoint2name
|
||||||
|
except:
|
||||||
|
import htmlentitydefs
|
||||||
|
name2codepoint={}
|
||||||
|
codepoint2name={}
|
||||||
|
for (name,codepoint) in htmlentitydefs.entitydefs.iteritems():
|
||||||
|
if codepoint.startswith('&#'): codepoint=unichr(int(codepoint[2:-1]))
|
||||||
|
name2codepoint[name]=ord(codepoint)
|
||||||
|
codepoint2name[ord(codepoint)]=name
|
||||||
|
|
||||||
# BeautifulSoup parser used for parsing microformats from embedded HTML content
|
# BeautifulSoup parser used for parsing microformats from embedded HTML content
|
||||||
# http://www.crummy.com/software/BeautifulSoup/. At the moment, it appears
|
# http://www.crummy.com/software/BeautifulSoup/. At the moment, it appears
|
||||||
# that there is a version incompatibility, so the import is replaced with
|
# that there is a version incompatibility, so the import is replaced with
|
||||||
@ -574,20 +586,9 @@ class _FeedParserMixin:
|
|||||||
if text.startswith('&#') and text.endswith(';'):
|
if text.startswith('&#') and text.endswith(';'):
|
||||||
return self.handle_entityref(text)
|
return self.handle_entityref(text)
|
||||||
else:
|
else:
|
||||||
# entity resolution graciously donated by Aaron Swartz
|
try: name2codepoint[ref]
|
||||||
def name2cp(k):
|
|
||||||
import htmlentitydefs
|
|
||||||
if hasattr(htmlentitydefs, 'name2codepoint'): # requires Python 2.3
|
|
||||||
return htmlentitydefs.name2codepoint[k]
|
|
||||||
k = htmlentitydefs.entitydefs[k]
|
|
||||||
if k.startswith('&#x') and k.endswith(';'):
|
|
||||||
return int(k[3:-1],16) # not in latin-1
|
|
||||||
if k.startswith('&#') and k.endswith(';'):
|
|
||||||
return int(k[2:-1]) # not in latin-1
|
|
||||||
return ord(k)
|
|
||||||
try: name2cp(ref)
|
|
||||||
except KeyError: text = '&%s;' % ref
|
except KeyError: text = '&%s;' % ref
|
||||||
else: text = unichr(name2cp(ref)).encode('utf-8')
|
else: text = unichr(name2codepoint[ref]).encode('utf-8')
|
||||||
self.elementstack[-1][2].append(text)
|
self.elementstack[-1][2].append(text)
|
||||||
|
|
||||||
def handle_data(self, text, escape=1):
|
def handle_data(self, text, escape=1):
|
||||||
@ -672,9 +673,9 @@ class _FeedParserMixin:
|
|||||||
# only if all the remaining content is nested underneath it.
|
# only if all the remaining content is nested underneath it.
|
||||||
# This means that the divs would be retained in the following:
|
# This means that the divs would be retained in the following:
|
||||||
# <div>foo</div><div>bar</div>
|
# <div>foo</div><div>bar</div>
|
||||||
if pieces and len(pieces)>1 and not pieces[-1].strip():
|
while pieces and len(pieces)>1 and not pieces[-1].strip():
|
||||||
del pieces[-1]
|
del pieces[-1]
|
||||||
if pieces and len(pieces)>1 and not pieces[0].strip():
|
while pieces and len(pieces)>1 and not pieces[0].strip():
|
||||||
del pieces[0]
|
del pieces[0]
|
||||||
if pieces and (pieces[0] == '<div>' or pieces[0].startswith('<div ')) and pieces[-1]=='</div>':
|
if pieces and (pieces[0] == '<div>' or pieces[0].startswith('<div ')) and pieces[-1]=='</div>':
|
||||||
depth = 0
|
depth = 0
|
||||||
@ -1521,6 +1522,11 @@ if _XML_AVAILABLE:
|
|||||||
|
|
||||||
if prefix:
|
if prefix:
|
||||||
localname = prefix.lower() + ':' + localname
|
localname = prefix.lower() + ':' + localname
|
||||||
|
elif namespace and not qname: #Expat
|
||||||
|
for name,value in self.namespacesInUse.items():
|
||||||
|
if name and value == namespace:
|
||||||
|
localname = name + ':' + localname
|
||||||
|
break
|
||||||
if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname))
|
if _debug: sys.stderr.write('startElementNS: qname = %s, namespace = %s, givenprefix = %s, prefix = %s, attrs = %s, localname = %s\n' % (qname, namespace, givenprefix, prefix, attrs.items(), localname))
|
||||||
|
|
||||||
for (namespace, attrlocalname), attrvalue in attrs._attrs.items():
|
for (namespace, attrlocalname), attrvalue in attrs._attrs.items():
|
||||||
@ -1546,6 +1552,11 @@ if _XML_AVAILABLE:
|
|||||||
prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
|
prefix = self._matchnamespaces.get(lowernamespace, givenprefix)
|
||||||
if prefix:
|
if prefix:
|
||||||
localname = prefix + ':' + localname
|
localname = prefix + ':' + localname
|
||||||
|
elif namespace and not qname: #Expat
|
||||||
|
for name,value in self.namespacesInUse.items():
|
||||||
|
if name and value == namespace:
|
||||||
|
localname = name + ':' + localname
|
||||||
|
break
|
||||||
localname = str(localname).lower()
|
localname = str(localname).lower()
|
||||||
self.unknown_endtag(localname)
|
self.unknown_endtag(localname)
|
||||||
|
|
||||||
@ -1657,8 +1668,7 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
|
|||||||
def handle_entityref(self, ref):
|
def handle_entityref(self, ref):
|
||||||
# called for each entity reference, e.g. for '©', ref will be 'copy'
|
# called for each entity reference, e.g. for '©', ref will be 'copy'
|
||||||
# Reconstruct the original entity reference.
|
# Reconstruct the original entity reference.
|
||||||
import htmlentitydefs
|
if name2codepoint.has_key(ref):
|
||||||
if not hasattr(htmlentitydefs, 'name2codepoint') or htmlentitydefs.name2codepoint.has_key(ref):
|
|
||||||
self.pieces.append('&%(ref)s;' % locals())
|
self.pieces.append('&%(ref)s;' % locals())
|
||||||
else:
|
else:
|
||||||
self.pieces.append('&%(ref)s' % locals())
|
self.pieces.append('&%(ref)s' % locals())
|
||||||
|
@ -193,8 +193,8 @@ def source(xsource, source, bozo, format):
|
|||||||
if key.startswith('planet_'):
|
if key.startswith('planet_'):
|
||||||
createTextElement(xsource, key.replace('_',':',1), value)
|
createTextElement(xsource, key.replace('_',':',1), value)
|
||||||
|
|
||||||
createTextElement(xsource, 'planet_bozo', bozo and 'true' or 'false')
|
createTextElement(xsource, 'planet:bozo', bozo and 'true' or 'false')
|
||||||
createTextElement(xsource, 'planet_format', format)
|
createTextElement(xsource, 'planet:format', format)
|
||||||
|
|
||||||
def reconstitute(feed, entry):
|
def reconstitute(feed, entry):
|
||||||
""" create an entry document from a parsed feed """
|
""" create an entry document from a parsed feed """
|
||||||
|
8
tests/data/reconstitute/source_bozo.xml
Normal file
8
tests/data/reconstitute/source_bozo.xml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<!--
|
||||||
|
Description: id
|
||||||
|
Expect: source.planet_bozo == 'false'
|
||||||
|
-->
|
||||||
|
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<entry/>
|
||||||
|
</feed>
|
8
tests/data/reconstitute/source_format.xml
Normal file
8
tests/data/reconstitute/source_format.xml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<!--
|
||||||
|
Description: id
|
||||||
|
Expect: source.planet_format == 'atom10'
|
||||||
|
-->
|
||||||
|
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<entry/>
|
||||||
|
</feed>
|
@ -81,6 +81,11 @@ try:
|
|||||||
sed=Popen(['sed','--version'],stdout=PIPE,stderr=PIPE)
|
sed=Popen(['sed','--version'],stdout=PIPE,stderr=PIPE)
|
||||||
sed.communicate()
|
sed.communicate()
|
||||||
if sed.returncode != 0: raise Exception
|
if sed.returncode != 0: raise Exception
|
||||||
except:
|
except Exception, expr:
|
||||||
# sed is not available
|
# sed is not available
|
||||||
del FilterTests.test_stripAd_yahoo
|
del FilterTests.test_stripAd_yahoo
|
||||||
|
|
||||||
|
if isinstance(expr, ImportError):
|
||||||
|
# Popen is not available
|
||||||
|
for method in dir(FilterTests):
|
||||||
|
if method.startswith('test_'): delattr(FilterTests,method)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user