Sam Ruby 2006-09-13 Add stop-opacity; resynch with feedparser
This commit is contained in:
commit
a0f4a4cace
@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
|
|||||||
Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
|
Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__version__ = "4.2-pre-" + "$Revision: 1.135 $"[11:16] + "-cvs"
|
__version__ = "4.2-pre-" + "$Revision: 1.139 $"[11:16] + "-cvs"
|
||||||
__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
|
__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without modification,
|
Redistribution and use in source and binary forms, with or without modification,
|
||||||
@ -569,7 +569,7 @@ class _FeedParserMixin:
|
|||||||
if _debug: sys.stderr.write('entering handle_entityref with %s\n' % ref)
|
if _debug: sys.stderr.write('entering handle_entityref with %s\n' % ref)
|
||||||
if ref in ('lt', 'gt', 'quot', 'amp', 'apos'):
|
if ref in ('lt', 'gt', 'quot', 'amp', 'apos'):
|
||||||
text = '&%s;' % ref
|
text = '&%s;' % ref
|
||||||
elif ref in self.entities:
|
elif ref in self.entities.keys():
|
||||||
text = self.entities[ref]
|
text = self.entities[ref]
|
||||||
if text.startswith('&#') and text.endswith(';'):
|
if text.startswith('&#') and text.endswith(';'):
|
||||||
return self.handle_entityref(text)
|
return self.handle_entityref(text)
|
||||||
@ -765,7 +765,7 @@ class _FeedParserMixin:
|
|||||||
|
|
||||||
# map win-1252 extensions to the proper code points
|
# map win-1252 extensions to the proper code points
|
||||||
if type(output) == type(u''):
|
if type(output) == type(u''):
|
||||||
output = u''.join([c in _cp1252 and _cp1252[c] or c for c in output])
|
output = u''.join([c in _cp1252.keys() and _cp1252[c] or c for c in output])
|
||||||
|
|
||||||
# categories/tags/keywords/whatever are handled in _end_category
|
# categories/tags/keywords/whatever are handled in _end_category
|
||||||
if element == 'category':
|
if element == 'category':
|
||||||
@ -839,7 +839,7 @@ class _FeedParserMixin:
|
|||||||
if filter(lambda e: e not in entitydefs.keys(),
|
if filter(lambda e: e not in entitydefs.keys(),
|
||||||
re.findall(r'&(\w+);',str)): return
|
re.findall(r'&(\w+);',str)): return
|
||||||
|
|
||||||
return True
|
return 1
|
||||||
|
|
||||||
def _mapToStandardPrefix(self, name):
|
def _mapToStandardPrefix(self, name):
|
||||||
colonpos = name.find(':')
|
colonpos = name.find(':')
|
||||||
@ -1649,7 +1649,7 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser):
|
|||||||
else:
|
else:
|
||||||
value = unichr(int(ref))
|
value = unichr(int(ref))
|
||||||
|
|
||||||
if value in _cp1252:
|
if value in _cp1252.keys():
|
||||||
self.pieces.append('&#%s;' % hex(ord(_cp1252[value]))[1:])
|
self.pieces.append('&#%s;' % hex(ord(_cp1252[value]))[1:])
|
||||||
else:
|
else:
|
||||||
self.pieces.append('&#%(ref)s;' % locals())
|
self.pieces.append('&#%(ref)s;' % locals())
|
||||||
@ -2284,7 +2284,7 @@ class _HTMLSanitizer(_BaseHTMLProcessor):
|
|||||||
'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
|
'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
|
||||||
'preserveAspectRatio', 'r', 'repeatCount', 'repeatDur',
|
'preserveAspectRatio', 'r', 'repeatCount', 'repeatDur',
|
||||||
'requiredExtensions', 'requiredFeatures', 'restart', 'rotate', 'rx',
|
'requiredExtensions', 'requiredFeatures', 'restart', 'rotate', 'rx',
|
||||||
'ry', 'slope', 'stemh', 'stemv', 'stop-color',
|
'ry', 'slope', 'stemh', 'stemv', 'stop-color', 'stop-opacity',
|
||||||
'strikethrough-position', 'strikethrough-thickness', 'stroke',
|
'strikethrough-position', 'strikethrough-thickness', 'stroke',
|
||||||
'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
|
'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
|
||||||
'stroke-linejoin', 'stroke-miterlimit', 'stroke-width',
|
'stroke-linejoin', 'stroke-miterlimit', 'stroke-width',
|
||||||
@ -2514,10 +2514,12 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
|
|||||||
If the etag argument is supplied, it will be used as the value of an
|
If the etag argument is supplied, it will be used as the value of an
|
||||||
If-None-Match request header.
|
If-None-Match request header.
|
||||||
|
|
||||||
If the modified argument is supplied, it must be a tuple of 9 integers
|
If the modified argument is supplied, it can be a tuple of 9 integers
|
||||||
as returned by gmtime() in the standard Python time module. This MUST
|
(as returned by gmtime() in the standard Python time module) or a date
|
||||||
be in GMT (Greenwich Mean Time). The formatted date/time will be used
|
string in any format supported by feedparser. Regardless, it MUST
|
||||||
as the value of an If-Modified-Since request header.
|
be in GMT (Greenwich Mean Time). It will be reformatted into an
|
||||||
|
RFC 1123-compliant date and used as the value of an If-Modified-Since
|
||||||
|
request header.
|
||||||
|
|
||||||
If the agent argument is supplied, it will be used as the value of a
|
If the agent argument is supplied, it will be used as the value of a
|
||||||
User-Agent request header.
|
User-Agent request header.
|
||||||
@ -2563,6 +2565,8 @@ def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, h
|
|||||||
request.add_header('User-Agent', agent)
|
request.add_header('User-Agent', agent)
|
||||||
if etag:
|
if etag:
|
||||||
request.add_header('If-None-Match', etag)
|
request.add_header('If-None-Match', etag)
|
||||||
|
if type(modified) == type(''):
|
||||||
|
modified = _parse_date(modified)
|
||||||
if modified:
|
if modified:
|
||||||
# format into an RFC 1123-compliant timestamp. We can't use
|
# format into an RFC 1123-compliant timestamp. We can't use
|
||||||
# time.strftime() since the %a and %b directives can be affected
|
# time.strftime() since the %a and %b directives can be affected
|
||||||
@ -3414,20 +3418,86 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer
|
|||||||
result['namespaces'] = feedparser.namespacesInUse
|
result['namespaces'] = feedparser.namespacesInUse
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
class Serializer:
|
||||||
|
def __init__(self, results):
|
||||||
|
self.results = results
|
||||||
|
|
||||||
|
class TextSerializer(Serializer):
|
||||||
|
def write(self, stream=sys.stdout):
|
||||||
|
self._writer(stream, self.results, '')
|
||||||
|
|
||||||
|
def _writer(self, stream, node, prefix):
|
||||||
|
if not node: return
|
||||||
|
if hasattr(node, 'keys'):
|
||||||
|
keys = node.keys()
|
||||||
|
keys.sort()
|
||||||
|
for k in keys:
|
||||||
|
if k in ('description', 'link'): continue
|
||||||
|
if node.has_key(k + '_detail'): continue
|
||||||
|
if node.has_key(k + '_parsed'): continue
|
||||||
|
self._writer(stream, node[k], prefix + k + '.')
|
||||||
|
elif type(node) == types.ListType:
|
||||||
|
index = 0
|
||||||
|
for n in node:
|
||||||
|
self._writer(stream, n, prefix[:-1] + '[' + str(index) + '].')
|
||||||
|
index += 1
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
s = str(node).encode('utf-8')
|
||||||
|
s = s.replace('\\', '\\\\')
|
||||||
|
s = s.replace('\r', '')
|
||||||
|
s = s.replace('\n', r'\n')
|
||||||
|
stream.write(prefix[:-1])
|
||||||
|
stream.write('=')
|
||||||
|
stream.write(s)
|
||||||
|
stream.write('\n')
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
class PprintSerializer(Serializer):
|
||||||
|
def write(self, stream=sys.stdout):
|
||||||
|
stream.write(self.results['href'] + '\n\n')
|
||||||
|
from pprint import pprint
|
||||||
|
pprint(self.results, stream)
|
||||||
|
stream.write('\n')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
if not sys.argv[1:]:
|
try:
|
||||||
print __doc__
|
from optparse import OptionParser
|
||||||
sys.exit(0)
|
except:
|
||||||
|
OptionParser = None
|
||||||
|
|
||||||
|
if OptionParser:
|
||||||
|
optionParser = OptionParser(version=__version__, usage="%prog [options] url_or_filename_or_-")
|
||||||
|
optionParser.set_defaults(format="pprint")
|
||||||
|
optionParser.add_option("-A", "--user-agent", dest="agent", metavar="AGENT", help="User-Agent for HTTP URLs")
|
||||||
|
optionParser.add_option("-e", "--referer", "--referrer", dest="referrer", metavar="URL", help="Referrer for HTTP URLs")
|
||||||
|
optionParser.add_option("-t", "--etag", dest="etag", metavar="TAG", help="ETag/If-None-Match for HTTP URLs")
|
||||||
|
optionParser.add_option("-m", "--last-modified", dest="modified", metavar="DATE", help="Last-modified/If-Modified-Since for HTTP URLs (any supported date format)")
|
||||||
|
optionParser.add_option("-f", "--format", dest="format", metavar="FORMAT", help="output results in FORMAT (text, pprint)")
|
||||||
|
optionParser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="write debugging information to stderr")
|
||||||
|
(options, urls) = optionParser.parse_args()
|
||||||
|
if options.verbose:
|
||||||
|
_debug = 1
|
||||||
|
if not urls:
|
||||||
|
optionParser.print_help()
|
||||||
|
sys.exit(0)
|
||||||
else:
|
else:
|
||||||
|
if not sys.argv[1:]:
|
||||||
|
print __doc__
|
||||||
|
sys.exit(0)
|
||||||
|
class _Options:
|
||||||
|
etag = modified = agent = referrer = None
|
||||||
|
format = 'pprint'
|
||||||
|
options = _Options()
|
||||||
urls = sys.argv[1:]
|
urls = sys.argv[1:]
|
||||||
|
|
||||||
zopeCompatibilityHack()
|
zopeCompatibilityHack()
|
||||||
from pprint import pprint
|
|
||||||
|
serializer = globals().get(options.format.capitalize() + 'Serializer', Serializer)
|
||||||
for url in urls:
|
for url in urls:
|
||||||
print url
|
results = parse(url, etag=options.etag, modified=options.modified, agent=options.agent, referrer=options.referrer)
|
||||||
print
|
serializer(results).write(sys.stdout)
|
||||||
result = parse(url)
|
|
||||||
pprint(result)
|
|
||||||
print
|
|
||||||
|
|
||||||
#REVISION HISTORY
|
#REVISION HISTORY
|
||||||
#1.0 - 9/27/2002 - MAP - fixed namespace processing on prefixed RSS 2.0 elements,
|
#1.0 - 9/27/2002 - MAP - fixed namespace processing on prefixed RSS 2.0 elements,
|
||||||
|
Loading…
Reference in New Issue
Block a user