Latest from Sam.
This commit is contained in:
commit
215777b9ee
@ -95,6 +95,13 @@ attributes on these elements.</li>
|
||||
<li>Anything else (i.e.. the default) will leave the date as is, causing the entries that contain these dates sort to the top of the planet until the time passes.</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li><code>xml_base</code> will adjust the <code>xml:base</code> values in effect for each of the text constructs in the feed (things like <code>title</code>, <code>summary</code>, and <code>content</code>). Other elements in the feed (most notably, <code>link</code> are not affected by this value.
|
||||
<ul style="margin:0">
|
||||
<li><code>feed_alternate</code> will replace the <code>xml:base</code> in effect with the value of the <code>alternate</code> <code>link</code> found either in the enclosed <code>source</code> or enclosing <code>feed</code> element.</li>
|
||||
<li><code>entry_alternate</code> will replace the <code>xml:base</code> in effect with the value of the <code>alternate</code> <code>link</code> found in this entry.</li>
|
||||
<li>Any other value will be treated as a <a href="http://www.ietf.org/rfc/rfc3986.txt">URI reference</a>. These values may be relative or absolute. If relative, the <code>xml:base</code> values in each text construct will each be adjusted separately using to the specified value.</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -30,5 +30,7 @@ def getLogger(level, format):
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
|
||||
# Configure feed parser
|
||||
from planet import feedparser
|
||||
feedparser.SANITIZE_HTML=0
|
||||
feedparser.RESOLVE_RELATIVE_URIS=0
|
||||
|
@ -125,6 +125,7 @@ def __init__():
|
||||
define_tmpl('summary_type', '')
|
||||
define_tmpl('content_type', '')
|
||||
define_tmpl('future_dates', 'keep')
|
||||
define_tmpl('xml_base', '')
|
||||
|
||||
def load(config_file):
|
||||
""" initialize and load a configuration"""
|
||||
|
@ -11,7 +11,7 @@ Recommended: Python 2.3 or later
|
||||
Recommended: CJKCodecs and iconv_codec <http://cjkpython.i18n.org/>
|
||||
"""
|
||||
|
||||
__version__ = "4.2-pre-" + "$Revision: 1.147 $"[11:16] + "-cvs"
|
||||
__version__ = "4.2-pre-" + "$Revision: 1.149 $"[11:16] + "-cvs"
|
||||
__license__ = """Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
@ -65,6 +65,14 @@ TIDY_MARKUP = 0
|
||||
# if TIDY_MARKUP = 1
|
||||
PREFERRED_TIDY_INTERFACES = ["uTidy", "mxTidy"]
|
||||
|
||||
# If you want feedparser to automatically resolve all relative URIs, set this
|
||||
# to 1.
|
||||
RESOLVE_RELATIVE_URIS = 1
|
||||
|
||||
# If you want feedparser to automatically sanitize all potentially unsafe
|
||||
# HTML content, set this to 1.
|
||||
SANITIZE_HTML = 1
|
||||
|
||||
# ---------- required modules (should come with any Python distribution) ----------
|
||||
import sgmllib, re, sys, copy, urlparse, time, rfc822, types, cgi, urllib, urllib2
|
||||
try:
|
||||
@ -732,7 +740,7 @@ class _FeedParserMixin:
|
||||
|
||||
is_htmlish = self.mapContentType(self.contentparams.get('type', 'text/html')) in self.html_types
|
||||
# resolve relative URIs within embedded markup
|
||||
if is_htmlish:
|
||||
if is_htmlish and RESOLVE_RELATIVE_URIS:
|
||||
if element in self.can_contain_relative_uris:
|
||||
output = _resolveRelativeURIs(output, self.baseuri, self.encoding, self.contentparams.get('type', 'text/html'))
|
||||
|
||||
@ -753,7 +761,7 @@ class _FeedParserMixin:
|
||||
self._getContext()['vcard'] = vcard
|
||||
|
||||
# sanitize embedded markup
|
||||
if is_htmlish:
|
||||
if is_htmlish and SANITIZE_HTML:
|
||||
if element in self.can_contain_dangerous_markup:
|
||||
output = _sanitizeHTML(output, self.encoding, self.contentparams.get('type', 'text/html'))
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
|
||||
# Differences from the current specification (23 December 2006) are as follows:
|
||||
# * Phases and insertion modes are one concept in parser.py.
|
||||
# * EOF handling is slightly different to make sure <html>, <head> and <body>
|
||||
@ -553,6 +554,10 @@ class InBodyPhase(Phase):
|
||||
# the crazy mode
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
#Keep a ref to this for special handling of whitespace in <pre>
|
||||
self.processSpaceCharactersNonPre = self.processSpaceCharacters
|
||||
|
||||
self.startTagHandler = utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
(("script", "style"), self.startTagScriptStyle),
|
||||
@ -622,6 +627,15 @@ class InBodyPhase(Phase):
|
||||
self.tree.openElements[-1])
|
||||
|
||||
# the real deal
|
||||
def processSpaceCharactersPre(self, data):
|
||||
#Sometimes (start of <pre> blocks) we want to drop leading newlines
|
||||
self.processSpaceCharacters = self.processSpaceCharactersNonPre
|
||||
if (data.startswith("\n") and self.tree.openElements[-1].name == "pre"
|
||||
and not self.tree.openElements[-1].hasContent()):
|
||||
data = data[1:]
|
||||
if data:
|
||||
self.tree.insertText(data)
|
||||
|
||||
def processCharacters(self, data):
|
||||
# XXX The specification says to do this for every character at the
|
||||
# moment, but apparently that doesn't match the real world so we don't
|
||||
@ -651,6 +665,8 @@ class InBodyPhase(Phase):
|
||||
if self.tree.elementInScope("p"):
|
||||
self.endTagP("p")
|
||||
self.tree.insertElement(name, attributes)
|
||||
if name == "pre":
|
||||
self.processSpaceCharacters = self.processSpaceCharactersPre
|
||||
|
||||
def startTagForm(self, name, attributes):
|
||||
if self.tree.formPointer:
|
||||
@ -849,6 +865,9 @@ class InBodyPhase(Phase):
|
||||
self.parser.phase.processEndTag(name)
|
||||
|
||||
def endTagBlock(self, name):
|
||||
#Put us back in the right whitespace handling mode
|
||||
if name == "pre":
|
||||
self.processSpaceCharacters = self.processSpaceCharactersNonPre
|
||||
inScope = self.tree.elementInScope(name)
|
||||
if inScope:
|
||||
self.tree.generateImpliedEndTags()
|
||||
|
@ -11,11 +11,6 @@ References:
|
||||
* http://wiki.whatwg.org/wiki/HtmlVsXhtml
|
||||
|
||||
@@TODO:
|
||||
* Produce SAX events based on the produced DOM. This is intended not to
|
||||
support streaming, but rather to support application level compatibility.
|
||||
* Optional namespace support
|
||||
* Investigate the use of <![CDATA[]]> when tokenizer.contentModelFlag
|
||||
indicates CDATA processsing to ensure dual HTML/XHTML compatibility.
|
||||
* Selectively lowercase only XHTML, but not foreign markup
|
||||
"""
|
||||
|
||||
@ -50,6 +45,13 @@ class XMLParser(html5parser.HTMLParser):
|
||||
if token["data"]:
|
||||
self.parseError(_("End tag contains unexpected attributes."))
|
||||
|
||||
elif token["type"] == "Comment":
|
||||
# Rescue CDATA from the comments
|
||||
if (token["data"].startswith("[CDATA[") and
|
||||
token["data"].endswith("]]")):
|
||||
token["type"] = "Characters"
|
||||
token["data"] = token["data"][7:-2]
|
||||
|
||||
return token
|
||||
|
||||
class XHTMLParser(XMLParser):
|
||||
|
@ -1,5 +1,6 @@
|
||||
import _base
|
||||
from xml.dom import minidom, Node
|
||||
from xml.dom import minidom, Node, XML_NAMESPACE, XMLNS_NAMESPACE
|
||||
import new
|
||||
|
||||
import re
|
||||
illegal_xml_chars = re.compile("[\x01-\x08\x0B\x0C\x0E-\x1F]")
|
||||
@ -71,6 +72,10 @@ class NodeBuilder(_base.Node):
|
||||
class TreeBuilder(_base.TreeBuilder):
|
||||
def documentClass(self):
|
||||
self.dom = minidom.getDOMImplementation().createDocument(None,None,None)
|
||||
def hilite(self, encoding):
|
||||
print 'foo'
|
||||
method = new.instancemethod(hilite, self.dom, self.dom.__class__)
|
||||
setattr(self.dom, 'hilite', method)
|
||||
return self
|
||||
|
||||
def doctypeClass(self,name):
|
||||
@ -129,3 +134,58 @@ def testSerializer(element):
|
||||
serializeElement(element, 0)
|
||||
|
||||
return "\n".join(rv)
|
||||
|
||||
def dom2sax(node, handler, nsmap={'xml':XML_NAMESPACE}):
|
||||
if node.nodeType == Node.ELEMENT_NODE:
|
||||
if not nsmap:
|
||||
handler.startElement(node.nodeName, node.attributes)
|
||||
for child in node.childNodes: dom2sax(child, handler, nsmap)
|
||||
handler.endElement(node.nodeName)
|
||||
else:
|
||||
attributes = dict(node.attributes.itemsNS())
|
||||
|
||||
# gather namespace declarations
|
||||
prefixes = []
|
||||
for attrname in node.attributes.keys():
|
||||
attr = node.getAttributeNode(attrname)
|
||||
if (attr.namespaceURI == XMLNS_NAMESPACE or
|
||||
(attr.namespaceURI == None and attr.nodeName.startswith('xmlns'))):
|
||||
prefix = (attr.localName != 'xmlns' and attr.localName or None)
|
||||
handler.startPrefixMapping(prefix, attr.nodeValue)
|
||||
prefixes.append(prefix)
|
||||
nsmap = nsmap.copy()
|
||||
nsmap[prefix] = attr.nodeValue
|
||||
del attributes[(attr.namespaceURI, attr.localName)]
|
||||
|
||||
# apply namespace declarations
|
||||
for attrname in node.attributes.keys():
|
||||
attr = node.getAttributeNode(attrname)
|
||||
if attr.namespaceURI == None and ':' in attr.nodeName:
|
||||
prefix = attr.nodeName.split(':')[0]
|
||||
if nsmap.has_key(prefix):
|
||||
del attributes[(attr.namespaceURI, attr.localName)]
|
||||
attributes[(nsmap[prefix],attr.localName)]=attr.nodeValue
|
||||
|
||||
# SAX events
|
||||
ns = node.namespaceURI or nsmap.get(None,None)
|
||||
handler.startElementNS((ns,node.nodeName), node.nodeName, attributes)
|
||||
for child in node.childNodes: dom2sax(child, handler, nsmap)
|
||||
handler.endElementNS((ns, node.nodeName), node.nodeName)
|
||||
for prefix in prefixes: handler.endPrefixMapping(prefix)
|
||||
|
||||
elif node.nodeType in [Node.TEXT_NODE, Node.CDATA_SECTION_NODE]:
|
||||
handler.characters(node.nodeValue)
|
||||
|
||||
elif node.nodeType == Node.DOCUMENT_NODE:
|
||||
handler.startDocument()
|
||||
for child in node.childNodes: dom2sax(child, handler, nsmap)
|
||||
handler.endDocument()
|
||||
|
||||
else:
|
||||
# ATTRIBUTE_NODE
|
||||
# ENTITY_NODE
|
||||
# PROCESSING_INSTRUCTION_NODE
|
||||
# COMMENT_NODE
|
||||
# DOCUMENT_TYPE_NODE
|
||||
# NOTATION_NODE
|
||||
pass
|
||||
|
@ -1,208 +1,5 @@
|
||||
try:
|
||||
from xml.etree import ElementTree
|
||||
except ImportError:
|
||||
from elementtree import ElementTree
|
||||
|
||||
import _base
|
||||
|
||||
class Element(_base.Node):
|
||||
def __init__(self, name):
|
||||
self._element = ElementTree.Element(name)
|
||||
self.name = name
|
||||
self.parent = None
|
||||
self._childNodes = []
|
||||
self._flags = []
|
||||
|
||||
#Set the element text and tail to the empty string rather than None
|
||||
#XXX - is this desirable or should we do it on a case by case basis?
|
||||
self._element.text = ""
|
||||
self._element.tail = ""
|
||||
|
||||
def _setName(self, name):
|
||||
self._element.tag = name
|
||||
|
||||
def _getName(self):
|
||||
return self._element.tag
|
||||
|
||||
name = property(_getName, _setName)
|
||||
|
||||
def _getAttributes(self):
|
||||
return self._element.attrib
|
||||
|
||||
def _setAttributes(self, attributes):
|
||||
#Delete existing attributes first
|
||||
#XXX - there may be a better way to do this...
|
||||
for key in self._element.attrib.keys():
|
||||
del self._element.attrib[key]
|
||||
for key, value in attributes.iteritems():
|
||||
self._element.set(key, value)
|
||||
|
||||
attributes = property(_getAttributes, _setAttributes)
|
||||
|
||||
def _getChildNodes(self):
|
||||
return self._childNodes
|
||||
|
||||
def _setChildNodes(self, value):
|
||||
del self._element[:]
|
||||
self._childNodes = []
|
||||
for element in value:
|
||||
self.insertChild(element)
|
||||
|
||||
childNodes = property(_getChildNodes, _setChildNodes)
|
||||
|
||||
def hasContent(self):
|
||||
"""Return true if the node has children or text"""
|
||||
return bool(self._element.text or self._element.getchildren())
|
||||
|
||||
def appendChild(self, node):
|
||||
self._childNodes.append(node)
|
||||
self._element.append(node._element)
|
||||
node.parent = self
|
||||
|
||||
def insertBefore(self, node, refNode):
|
||||
index = self._element.getchildren().index(refNode._element)
|
||||
self._element.insert(index, node._element)
|
||||
node.parent = self
|
||||
|
||||
def removeChild(self, node):
|
||||
self._element.remove(node._element)
|
||||
node.parent=None
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
if not(len(self._element)):
|
||||
self._element.text += data
|
||||
elif insertBefore is None:
|
||||
#Insert the text as the tail of the last child element
|
||||
self._element[-1].tail += data
|
||||
else:
|
||||
#Insert the text before the specified node
|
||||
children = self._element.getchildren()
|
||||
index = children.index(insertBefore._element)
|
||||
if index > 0:
|
||||
self._element[index-1].tail += data
|
||||
else:
|
||||
self._element.text += data
|
||||
|
||||
def cloneNode(self):
|
||||
element = Element(self.name)
|
||||
element.attributes = self.attributes
|
||||
return element
|
||||
|
||||
def reparentChildren(self, newParent):
|
||||
if newParent.childNodes:
|
||||
newParent.childNodes[-1]._element.tail += self._element.text
|
||||
else:
|
||||
newParent._element.text += self._element.text
|
||||
self._element.text = ""
|
||||
_base.Node.reparentChildren(self, newParent)
|
||||
|
||||
class Comment(Element):
|
||||
def __init__(self, data):
|
||||
Element.__init__(self, Comment)
|
||||
self._element.text = data
|
||||
|
||||
def _getData(self):
|
||||
return self._element.text
|
||||
|
||||
def _setData(self, value):
|
||||
self._element.text = value
|
||||
|
||||
data = property(_getData, _setData)
|
||||
|
||||
class DocumentType(Element):
|
||||
def __init__(self, name):
|
||||
Element.__init__(self, DocumentType)
|
||||
self._element.text = name
|
||||
|
||||
class Document(Element):
|
||||
def __init__(self):
|
||||
Element.__init__(self, Document)
|
||||
|
||||
def testSerializer(element):
|
||||
rv = []
|
||||
finalText = None
|
||||
def serializeElement(element, indent=0):
|
||||
if element.tag is DocumentType:
|
||||
rv.append("|%s<!DOCTYPE %s>"%(' '*indent, element.text))
|
||||
elif element.tag is Document:
|
||||
rv.append("#document")
|
||||
if element.text:
|
||||
rv.append("|%s\"%s\""%(' '*(indent+2), element.text))
|
||||
if element.tail:
|
||||
finalText = element.tail
|
||||
elif element.tag is Comment:
|
||||
rv.append("|%s<!-- %s -->"%(' '*indent, element.text))
|
||||
else:
|
||||
rv.append("|%s<%s>"%(' '*indent, element.tag))
|
||||
if hasattr(element, "attrib"):
|
||||
for name, value in element.attrib.iteritems():
|
||||
rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
|
||||
if element.text:
|
||||
rv.append("|%s\"%s\"" %(' '*(indent+2), element.text))
|
||||
indent += 2
|
||||
for child in element.getchildren():
|
||||
serializeElement(child, indent)
|
||||
if element.tail:
|
||||
rv.append("|%s\"%s\"" %(' '*(indent-2), element.tail))
|
||||
serializeElement(element, 0)
|
||||
|
||||
if finalText is not None:
|
||||
rv.append("|%s\"%s\""%(' '*2, finalText))
|
||||
|
||||
return "\n".join(rv)
|
||||
|
||||
def tostring(element):
|
||||
"""Serialize an element and its child nodes to a string"""
|
||||
rv = []
|
||||
finalText = None
|
||||
def serializeElement(element):
|
||||
if element.tag is DocumentType:
|
||||
rv.append("<!DOCTYPE %s>"%(element.text,))
|
||||
elif element.tag is Document:
|
||||
if element.text:
|
||||
rv.append(element.text)
|
||||
if element.tail:
|
||||
finalText = element.tail
|
||||
|
||||
for child in element.getchildren():
|
||||
serializeElement(child)
|
||||
|
||||
elif element.tag is Comment:
|
||||
rv.append("<!--%s-->"%(element.text,))
|
||||
else:
|
||||
#This is assumed to be an ordinary element
|
||||
if not element.attrib:
|
||||
rv.append("<%s>"%(element.tag,))
|
||||
else:
|
||||
attr = " ".join(["%s=\"%s\""%(name, value)
|
||||
for name, value in element.attrib.iteritems()])
|
||||
rv.append("<%s %s>"%(element.tag, attr))
|
||||
if element.text:
|
||||
rv.append(element.text)
|
||||
|
||||
for child in element.getchildren():
|
||||
serializeElement(child)
|
||||
|
||||
rv.append("</%s>"%(element.tag,))
|
||||
|
||||
if element.tail:
|
||||
rv.append(element.tail)
|
||||
|
||||
serializeElement(element)
|
||||
|
||||
if finalText is not None:
|
||||
rv.append("%s\""%(' '*2, finalText))
|
||||
|
||||
return "".join(rv)
|
||||
|
||||
class TreeBuilder(_base.TreeBuilder):
|
||||
documentClass = Document
|
||||
doctypeClass = DocumentType
|
||||
elementClass = Element
|
||||
commentClass = Comment
|
||||
|
||||
def testSerializer(self, element):
|
||||
return testSerializer(element)
|
||||
import etreefull
|
||||
|
||||
class TreeBuilder(etreefull.TreeBuilder):
|
||||
def getDocument(self):
|
||||
return self.document._element
|
||||
return self.document._element.find("html")
|
||||
|
216
planet/html5lib/treebuilders/etreefull.py
Normal file
216
planet/html5lib/treebuilders/etreefull.py
Normal file
@ -0,0 +1,216 @@
|
||||
try:
|
||||
from xml.etree import ElementTree
|
||||
except ImportError:
|
||||
from elementtree import ElementTree
|
||||
|
||||
import _base
|
||||
|
||||
class Element(_base.Node):
|
||||
def __init__(self, name):
|
||||
self._element = ElementTree.Element(name)
|
||||
self.name = name
|
||||
self.parent = None
|
||||
self._childNodes = []
|
||||
self._flags = []
|
||||
|
||||
def _setName(self, name):
|
||||
self._element.tag = name
|
||||
|
||||
def _getName(self):
|
||||
return self._element.tag
|
||||
|
||||
name = property(_getName, _setName)
|
||||
|
||||
def _getAttributes(self):
|
||||
return self._element.attrib
|
||||
|
||||
def _setAttributes(self, attributes):
|
||||
#Delete existing attributes first
|
||||
#XXX - there may be a better way to do this...
|
||||
for key in self._element.attrib.keys():
|
||||
del self._element.attrib[key]
|
||||
for key, value in attributes.iteritems():
|
||||
self._element.set(key, value)
|
||||
|
||||
attributes = property(_getAttributes, _setAttributes)
|
||||
|
||||
def _getChildNodes(self):
|
||||
return self._childNodes
|
||||
|
||||
def _setChildNodes(self, value):
|
||||
del self._element[:]
|
||||
self._childNodes = []
|
||||
for element in value:
|
||||
self.insertChild(element)
|
||||
|
||||
childNodes = property(_getChildNodes, _setChildNodes)
|
||||
|
||||
def hasContent(self):
|
||||
"""Return true if the node has children or text"""
|
||||
return bool(self._element.text or self._element.getchildren())
|
||||
|
||||
def appendChild(self, node):
|
||||
self._childNodes.append(node)
|
||||
self._element.append(node._element)
|
||||
node.parent = self
|
||||
|
||||
def insertBefore(self, node, refNode):
|
||||
index = self._element.getchildren().index(refNode._element)
|
||||
self._element.insert(index, node._element)
|
||||
node.parent = self
|
||||
|
||||
def removeChild(self, node):
|
||||
self._element.remove(node._element)
|
||||
node.parent=None
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
if not(len(self._element)):
|
||||
if not self._element.text:
|
||||
self._element.text = ""
|
||||
self._element.text += data
|
||||
elif insertBefore is None:
|
||||
#Insert the text as the tail of the last child element
|
||||
if not self._element[-1].tail:
|
||||
self._element[-1].tail = ""
|
||||
self._element[-1].tail += data
|
||||
else:
|
||||
#Insert the text before the specified node
|
||||
children = self._element.getchildren()
|
||||
index = children.index(insertBefore._element)
|
||||
if index > 0:
|
||||
if not self._element[index-1].tail:
|
||||
self._element[index-1].tail = ""
|
||||
self._element[index-1].tail += data
|
||||
else:
|
||||
if not self._element.text:
|
||||
self._element.text = ""
|
||||
self._element.text += data
|
||||
|
||||
def cloneNode(self):
|
||||
element = Element(self.name)
|
||||
element.attributes = self.attributes
|
||||
return element
|
||||
|
||||
def reparentChildren(self, newParent):
|
||||
if newParent.childNodes:
|
||||
newParent.childNodes[-1]._element.tail += self._element.text
|
||||
else:
|
||||
if not newParent._element.text:
|
||||
newParent._element.text = ""
|
||||
if self._element.text is not None:
|
||||
newParent._element.text += self._element.text
|
||||
self._element.text = ""
|
||||
_base.Node.reparentChildren(self, newParent)
|
||||
|
||||
class Comment(Element):
|
||||
def __init__(self, data):
|
||||
#Use the superclass constructor to set all properties on the
|
||||
#wrapper element
|
||||
Element.__init__(self, None)
|
||||
self._element = ElementTree.Comment(data)
|
||||
|
||||
def _getData(self):
|
||||
return self._element.text
|
||||
|
||||
def _setData(self, value):
|
||||
self._element.text = value
|
||||
|
||||
data = property(_getData, _setData)
|
||||
|
||||
class DocumentType(Element):
|
||||
def __init__(self, name):
|
||||
Element.__init__(self, DocumentType)
|
||||
self._element.text = name
|
||||
|
||||
class Document(Element):
|
||||
def __init__(self):
|
||||
Element.__init__(self, Document)
|
||||
|
||||
def testSerializer(element):
|
||||
rv = []
|
||||
finalText = None
|
||||
def serializeElement(element, indent=0):
|
||||
if element.tag is DocumentType:
|
||||
rv.append("|%s<!DOCTYPE %s>"%(' '*indent, element.text))
|
||||
elif element.tag is Document:
|
||||
rv.append("#document")
|
||||
if element.text:
|
||||
rv.append("|%s\"%s\""%(' '*(indent+2), element.text))
|
||||
if element.tail:
|
||||
finalText = element.tail
|
||||
elif element.tag is ElementTree.Comment:
|
||||
rv.append("|%s<!-- %s -->"%(' '*indent, element.text))
|
||||
else:
|
||||
rv.append("|%s<%s>"%(' '*indent, element.tag))
|
||||
if hasattr(element, "attrib"):
|
||||
for name, value in element.attrib.iteritems():
|
||||
rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
|
||||
if element.text:
|
||||
rv.append("|%s\"%s\"" %(' '*(indent+2), element.text))
|
||||
indent += 2
|
||||
for child in element.getchildren():
|
||||
serializeElement(child, indent)
|
||||
if element.tail:
|
||||
rv.append("|%s\"%s\"" %(' '*(indent-2), element.tail))
|
||||
serializeElement(element, 0)
|
||||
|
||||
if finalText is not None:
|
||||
rv.append("|%s\"%s\""%(' '*2, finalText))
|
||||
|
||||
return "\n".join(rv)
|
||||
|
||||
def tostring(element):
|
||||
"""Serialize an element and its child nodes to a string"""
|
||||
rv = []
|
||||
finalText = None
|
||||
def serializeElement(element):
|
||||
if element.tag is DocumentType:
|
||||
rv.append("<!DOCTYPE %s>"%(element.text,))
|
||||
elif element.tag is Document:
|
||||
if element.text:
|
||||
rv.append(element.text)
|
||||
if element.tail:
|
||||
finalText = element.tail
|
||||
|
||||
for child in element.getchildren():
|
||||
serializeElement(child)
|
||||
|
||||
elif element.tag is ElementTree.Comment:
|
||||
rv.append("<!--%s-->"%(element.text,))
|
||||
else:
|
||||
#This is assumed to be an ordinary element
|
||||
if not element.attrib:
|
||||
rv.append("<%s>"%(element.tag,))
|
||||
else:
|
||||
attr = " ".join(["%s=\"%s\""%(name, value)
|
||||
for name, value in element.attrib.iteritems()])
|
||||
rv.append("<%s %s>"%(element.tag, attr))
|
||||
if element.text:
|
||||
rv.append(element.text)
|
||||
|
||||
for child in element.getchildren():
|
||||
serializeElement(child)
|
||||
|
||||
rv.append("</%s>"%(element.tag,))
|
||||
|
||||
if element.tail:
|
||||
rv.append(element.tail)
|
||||
|
||||
serializeElement(element)
|
||||
|
||||
if finalText is not None:
|
||||
rv.append("%s\""%(' '*2, finalText))
|
||||
|
||||
return "".join(rv)
|
||||
|
||||
class TreeBuilder(_base.TreeBuilder):
|
||||
documentClass = Document
|
||||
doctypeClass = DocumentType
|
||||
elementClass = Element
|
||||
commentClass = Comment
|
||||
|
||||
def testSerializer(self, element):
|
||||
return testSerializer(element)
|
||||
|
||||
def getDocument(self):
|
||||
return self.document._element
|
@ -16,11 +16,13 @@ __contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)",
|
||||
"Xavier Verges Farrero",
|
||||
"Jonathan Feinberg",
|
||||
"Blair Zajac",
|
||||
"Sam Ruby"]
|
||||
"Sam Ruby",
|
||||
"Louis Nyffenegger"]
|
||||
__license__ = "MIT"
|
||||
__version__ = "$Rev: 217 $"
|
||||
__version__ = "$Rev: 227 $"
|
||||
|
||||
import re
|
||||
import sys
|
||||
import md5
|
||||
import email
|
||||
import email.Utils
|
||||
@ -41,6 +43,12 @@ import hmac
|
||||
from gettext import gettext as _
|
||||
from socket import gaierror
|
||||
|
||||
if sys.version_info >= (2,3):
|
||||
from iri2uri import iri2uri
|
||||
else:
|
||||
def iri2uri(uri):
|
||||
return uri
|
||||
|
||||
__all__ = ['Http', 'Response', 'HttpLib2Error',
|
||||
'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent',
|
||||
'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError',
|
||||
@ -51,7 +59,7 @@ __all__ = ['Http', 'Response', 'HttpLib2Error',
|
||||
debuglevel = 0
|
||||
|
||||
# Python 2.3 support
|
||||
if 'sorted' not in __builtins__:
|
||||
if sys.version_info < (2,4):
|
||||
def sorted(seq):
|
||||
seq.sort()
|
||||
return seq
|
||||
@ -60,7 +68,6 @@ if 'sorted' not in __builtins__:
|
||||
def HTTPResponse__getheaders(self):
|
||||
"""Return list of (header, value) tuples."""
|
||||
if self.msg is None:
|
||||
print "================================"
|
||||
raise httplib.ResponseNotReady()
|
||||
return self.msg.items()
|
||||
|
||||
@ -75,6 +82,8 @@ class RedirectLimit(HttpLib2Error): pass
|
||||
class FailedToDecompressContent(HttpLib2Error): pass
|
||||
class UnimplementedDigestAuthOptionError(HttpLib2Error): pass
|
||||
class UnimplementedHmacDigestAuthOptionError(HttpLib2Error): pass
|
||||
class RelativeURIError(HttpLib2Error): pass
|
||||
class ServerNotFoundError(HttpLib2Error): pass
|
||||
|
||||
# Open Items:
|
||||
# -----------
|
||||
@ -118,6 +127,8 @@ def parse_uri(uri):
|
||||
|
||||
def urlnorm(uri):
|
||||
(scheme, authority, path, query, fragment) = parse_uri(uri)
|
||||
if not scheme or not authority:
|
||||
raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
|
||||
authority = authority.lower()
|
||||
scheme = scheme.lower()
|
||||
if not path:
|
||||
@ -125,6 +136,7 @@ def urlnorm(uri):
|
||||
# Could do syntax based normalization of the URI before
|
||||
# computing the digest. See Section 6.2.2 of Std 66.
|
||||
request_uri = query and "?".join([path, query]) or path
|
||||
scheme = scheme.lower()
|
||||
defrag_uri = scheme + "://" + authority + request_uri
|
||||
return scheme, authority, request_uri, defrag_uri
|
||||
|
||||
@ -143,9 +155,10 @@ def safename(filename):
|
||||
try:
|
||||
if re_url_scheme.match(filename):
|
||||
if isinstance(filename,str):
|
||||
filename=filename.decode('utf-8').encode('idna')
|
||||
filename = filename.decode('utf-8')
|
||||
filename = filename.encode('idna')
|
||||
else:
|
||||
filename=filename.encode('idna')
|
||||
filename = filename.encode('idna')
|
||||
except:
|
||||
pass
|
||||
if isinstance(filename,unicode):
|
||||
@ -260,16 +273,26 @@ def _entry_disposition(response_headers, request_headers):
|
||||
now = time.time()
|
||||
current_age = max(0, now - date)
|
||||
if cc_response.has_key('max-age'):
|
||||
freshness_lifetime = int(cc_response['max-age'])
|
||||
try:
|
||||
freshness_lifetime = int(cc_response['max-age'])
|
||||
except:
|
||||
freshness_lifetime = 0
|
||||
elif response_headers.has_key('expires'):
|
||||
expires = email.Utils.parsedate_tz(response_headers['expires'])
|
||||
freshness_lifetime = max(0, calendar.timegm(expires) - date)
|
||||
else:
|
||||
freshness_lifetime = 0
|
||||
if cc.has_key('max-age'):
|
||||
freshness_lifetime = min(freshness_lifetime, int(cc['max-age']))
|
||||
try:
|
||||
freshness_lifetime = int(cc['max-age'])
|
||||
except:
|
||||
freshness_lifetime = 0
|
||||
if cc.has_key('min-fresh'):
|
||||
current_age += int(cc['min-fresh'])
|
||||
try:
|
||||
min_fresh = int(cc['min-fresh'])
|
||||
except:
|
||||
min_fresh = 0
|
||||
current_age += min_fresh
|
||||
if freshness_lifetime > current_age:
|
||||
retval = "FRESH"
|
||||
return retval
|
||||
@ -418,13 +441,13 @@ class DigestAuthentication(Authentication):
|
||||
|
||||
def response(self, response, content):
|
||||
if not response.has_key('authentication-info'):
|
||||
challenge = _parse_www_authenticate(response, 'www-authenticate')['digest']
|
||||
challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {})
|
||||
if 'true' == challenge.get('stale'):
|
||||
self.challenge['nonce'] = challenge['nonce']
|
||||
self.challenge['nc'] = 1
|
||||
return True
|
||||
else:
|
||||
updated_challenge = _parse_www_authenticate(response, 'authentication-info')['digest']
|
||||
updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {})
|
||||
|
||||
if updated_challenge.has_key('nextnonce'):
|
||||
self.challenge['nonce'] = updated_challenge['nextnonce']
|
||||
@ -440,7 +463,6 @@ class HmacDigestAuthentication(Authentication):
|
||||
Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
|
||||
challenge = _parse_www_authenticate(response, 'www-authenticate')
|
||||
self.challenge = challenge['hmacdigest']
|
||||
print self.challenge
|
||||
# TODO: self.challenge['domain']
|
||||
self.challenge['reason'] = self.challenge.get('reason', 'unauthorized')
|
||||
if self.challenge['reason'] not in ['unauthorized', 'integrity']:
|
||||
@ -466,9 +488,6 @@ class HmacDigestAuthentication(Authentication):
|
||||
self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(),
|
||||
":", self.challenge['realm']
|
||||
])
|
||||
print response['www-authenticate']
|
||||
print "".join([self.credentials[1], self.challenge['salt']])
|
||||
print "key_str = %s" % self.key
|
||||
self.key = self.pwhashmod.new(self.key).hexdigest().lower()
|
||||
|
||||
def request(self, method, request_uri, headers, content):
|
||||
@ -479,8 +498,6 @@ class HmacDigestAuthentication(Authentication):
|
||||
created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime())
|
||||
cnonce = _cnonce()
|
||||
request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val)
|
||||
print "key = %s" % self.key
|
||||
print "msg = %s" % request_digest
|
||||
request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
|
||||
headers['Authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % (
|
||||
self.credentials[0],
|
||||
@ -641,6 +658,8 @@ class Http:
|
||||
try:
|
||||
conn.request(method, request_uri, body, headers)
|
||||
response = conn.getresponse()
|
||||
except gaierror:
|
||||
raise ServerNotFoundError("Unable to find the server at %s" % request_uri)
|
||||
except:
|
||||
if i == 0:
|
||||
conn.close()
|
||||
@ -752,6 +771,8 @@ a string that contains the response entity body.
|
||||
if not headers.has_key('user-agent'):
|
||||
headers['user-agent'] = "Python-httplib2/%s" % __version__
|
||||
|
||||
uri = iri2uri(uri)
|
||||
|
||||
(scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
|
||||
|
||||
if not self.connections.has_key(scheme+":"+authority):
|
||||
@ -780,7 +801,7 @@ a string that contains the response entity body.
|
||||
else:
|
||||
cachekey = None
|
||||
|
||||
if method in ["PUT"] and self.cache and info.has_key('etag') and not self.ignore_etag:
|
||||
if method in ["PUT"] and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers:
|
||||
# http://www.w3.org/1999/04/Editing/
|
||||
headers['if-match'] = info['etag']
|
||||
|
||||
@ -815,9 +836,9 @@ a string that contains the response entity body.
|
||||
return (response, content)
|
||||
|
||||
if entry_disposition == "STALE":
|
||||
if info.has_key('etag') and not self.ignore_etag:
|
||||
if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers:
|
||||
headers['if-none-match'] = info['etag']
|
||||
if info.has_key('last-modified'):
|
||||
if info.has_key('last-modified') and not 'last-modified' in headers:
|
||||
headers['if-modified-since'] = info['last-modified']
|
||||
elif entry_disposition == "TRANSPARENT":
|
||||
pass
|
||||
|
110
planet/httplib2/iri2uri.py
Normal file
110
planet/httplib2/iri2uri.py
Normal file
@ -0,0 +1,110 @@
|
||||
"""
|
||||
iri2uri
|
||||
|
||||
Converts an IRI to a URI.
|
||||
|
||||
"""
|
||||
__author__ = "Joe Gregorio (joe@bitworking.org)"
|
||||
__copyright__ = "Copyright 2006, Joe Gregorio"
|
||||
__contributors__ = []
|
||||
__version__ = "1.0.0"
|
||||
__license__ = "MIT"
|
||||
__history__ = """
|
||||
"""
|
||||
|
||||
import urlparse
|
||||
|
||||
|
||||
# Convert an IRI to a URI following the rules in RFC 3987
|
||||
#
|
||||
# The characters we need to enocde and escape are defined in the spec:
|
||||
#
|
||||
# iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD
|
||||
# ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
|
||||
# / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
|
||||
# / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
|
||||
# / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
|
||||
# / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
|
||||
# / %xD0000-DFFFD / %xE1000-EFFFD
|
||||
|
||||
escape_range = [
|
||||
(0xA0, 0xD7FF ),
|
||||
(0xE000, 0xF8FF ),
|
||||
(0xF900, 0xFDCF ),
|
||||
(0xFDF0, 0xFFEF),
|
||||
(0x10000, 0x1FFFD ),
|
||||
(0x20000, 0x2FFFD ),
|
||||
(0x30000, 0x3FFFD),
|
||||
(0x40000, 0x4FFFD ),
|
||||
(0x50000, 0x5FFFD ),
|
||||
(0x60000, 0x6FFFD),
|
||||
(0x70000, 0x7FFFD ),
|
||||
(0x80000, 0x8FFFD ),
|
||||
(0x90000, 0x9FFFD),
|
||||
(0xA0000, 0xAFFFD ),
|
||||
(0xB0000, 0xBFFFD ),
|
||||
(0xC0000, 0xCFFFD),
|
||||
(0xD0000, 0xDFFFD ),
|
||||
(0xE1000, 0xEFFFD),
|
||||
(0xF0000, 0xFFFFD ),
|
||||
(0x100000, 0x10FFFD)
|
||||
]
|
||||
|
||||
def encode(c):
|
||||
retval = c
|
||||
i = ord(c)
|
||||
for low, high in escape_range:
|
||||
if i < low:
|
||||
break
|
||||
if i >= low and i <= high:
|
||||
retval = "".join(["%%%2X" % ord(o) for o in c.encode('utf-8')])
|
||||
break
|
||||
return retval
|
||||
|
||||
|
||||
def iri2uri(uri):
|
||||
"""Convert an IRI to a URI. Note that IRIs must be
|
||||
passed in a unicode strings. That is, do not utf-8 encode
|
||||
the IRI before passing it into the function."""
|
||||
if isinstance(uri ,unicode):
|
||||
(scheme, authority, path, query, fragment) = urlparse.urlsplit(uri)
|
||||
authority = authority.encode('idna')
|
||||
# For each character in 'ucschar' or 'iprivate'
|
||||
# 1. encode as utf-8
|
||||
# 2. then %-encode each octet of that utf-8
|
||||
uri = urlparse.urlunsplit((scheme, authority, path, query, fragment))
|
||||
uri = "".join([encode(c) for c in uri])
|
||||
return uri
|
||||
|
||||
if __name__ == "__main__":
|
||||
import unittest
|
||||
|
||||
class Test(unittest.TestCase):
|
||||
|
||||
def test_uris(self):
|
||||
"""Test that URIs are invariant under the transformation."""
|
||||
invariant = [
|
||||
u"ftp://ftp.is.co.za/rfc/rfc1808.txt",
|
||||
u"http://www.ietf.org/rfc/rfc2396.txt",
|
||||
u"ldap://[2001:db8::7]/c=GB?objectClass?one",
|
||||
u"mailto:John.Doe@example.com",
|
||||
u"news:comp.infosystems.www.servers.unix",
|
||||
u"tel:+1-816-555-1212",
|
||||
u"telnet://192.0.2.16:80/",
|
||||
u"urn:oasis:names:specification:docbook:dtd:xml:4.1.2" ]
|
||||
for uri in invariant:
|
||||
self.assertEqual(uri, iri2uri(uri))
|
||||
|
||||
def test_iri(self):
|
||||
""" Test that the right type of escaping is done for each part of the URI."""
|
||||
self.assertEqual("http://xn--o3h.com/%E2%98%84", iri2uri(u"http://\N{COMET}.com/\N{COMET}"))
|
||||
self.assertEqual("http://bitworking.org/?fred=%E2%98%84", iri2uri(u"http://bitworking.org/?fred=\N{COMET}"))
|
||||
self.assertEqual("http://bitworking.org/#%E2%98%84", iri2uri(u"http://bitworking.org/#\N{COMET}"))
|
||||
self.assertEqual("#%E2%98%84", iri2uri(u"#\N{COMET}"))
|
||||
self.assertEqual("/fred?bar=%E2%98%9A#%E2%98%84", iri2uri(u"/fred?bar=\N{BLACK LEFT POINTING INDEX}#\N{COMET}"))
|
||||
self.assertEqual("/fred?bar=%E2%98%9A#%E2%98%84", iri2uri(iri2uri(u"/fred?bar=\N{BLACK LEFT POINTING INDEX}#\N{COMET}")))
|
||||
self.assertNotEqual("/fred?bar=%E2%98%9A#%E2%98%84", iri2uri(u"/fred?bar=\N{BLACK LEFT POINTING INDEX}#\N{COMET}".encode('utf-8')))
|
||||
|
||||
unittest.main()
|
||||
|
||||
|
@ -6,6 +6,7 @@ Process a set of configuration defined sanitations on a given feed.
|
||||
import time
|
||||
# Planet modules
|
||||
import planet, config, shell
|
||||
from planet import feedparser
|
||||
|
||||
type_map = {'text': 'text/plain', 'html': 'text/html',
|
||||
'xhtml': 'application/xhtml+xml'}
|
||||
@ -92,3 +93,40 @@ def scrub(feed_uri, data):
|
||||
or entry['published_parsed'] <= now) and
|
||||
(not entry.has_key('updated_parsed') or not entry['updated_parsed']
|
||||
or entry['updated_parsed'] <= now)]
|
||||
|
||||
scrub_xmlbase = config.xml_base(feed_uri)
|
||||
|
||||
# resolve relative URIs and sanitize
|
||||
for entry in data.entries + [data.feed]:
|
||||
for key in entry.keys():
|
||||
if key == 'content':
|
||||
node = entry.content[0]
|
||||
elif key.endswith('_detail'):
|
||||
node = entry[key]
|
||||
else:
|
||||
continue
|
||||
|
||||
if not node.has_key('type'): continue
|
||||
if not 'html' in node['type']: continue
|
||||
if not node.has_key('value'): continue
|
||||
|
||||
if node.has_key('base'):
|
||||
if scrub_xmlbase:
|
||||
if scrub_xmlbase == 'feed_alternate':
|
||||
if entry.has_key('source') and \
|
||||
entry.source.has_key('link'):
|
||||
node['base'] = entry.source.link
|
||||
elif data.feed.has_key('link'):
|
||||
node['base'] = data.feed.link
|
||||
elif scrub_xmlbase == 'entry_alternate':
|
||||
if entry.has_key('link'):
|
||||
node['base'] = entry.link
|
||||
else:
|
||||
node['base'] = feedparser._urljoin(
|
||||
node['base'], scrub_xmlbase)
|
||||
|
||||
node['value'] = feedparser._resolveRelativeURIs(
|
||||
node.value, node.base, 'utf-8', node.type)
|
||||
|
||||
node['value'] = feedparser._sanitizeHTML(
|
||||
node.value, 'utf-8', node.type)
|
||||
|
@ -254,7 +254,6 @@ def writeCache(feed_uri, feed_info, data):
|
||||
|
||||
def httpThread(thread_index, input_queue, output_queue, log):
|
||||
import httplib2, md5
|
||||
from socket import gaierror, error
|
||||
from httplib import BadStatusLine
|
||||
|
||||
h = httplib2.Http(config.http_cache_directory())
|
||||
@ -304,13 +303,12 @@ def httpThread(thread_index, input_queue, output_queue, log):
|
||||
if resp.has_key('content-encoding'):
|
||||
del resp['content-encoding']
|
||||
setattr(feed, 'headers', resp)
|
||||
except gaierror:
|
||||
log.error("Fail to resolve server name %s via %d",
|
||||
uri, thread_index)
|
||||
except BadStatusLine:
|
||||
log.error("Bad Status Line received for %s via %d",
|
||||
uri, thread_index)
|
||||
except error, e:
|
||||
except httplib2.HttpLib2Error, e:
|
||||
log.error("HttpLib2Error: %s via %d", str(e), thread_index)
|
||||
except socket.error, e:
|
||||
if e.__class__.__name__.lower()=='timeout':
|
||||
feed.headers['status'] = '408'
|
||||
log.warn("Timeout in thread-%d", thread_index)
|
||||
|
@ -3,6 +3,7 @@
|
||||
import unittest, os, sys, glob, new, re, StringIO, time
|
||||
from planet import feedparser
|
||||
from planet.reconstitute import reconstitute
|
||||
from planet.scrub import scrub
|
||||
|
||||
testfiles = 'tests/data/reconstitute/%s.xml'
|
||||
|
||||
@ -23,6 +24,7 @@ class ReconstituteTest(unittest.TestCase):
|
||||
# parse and reconstitute to a string
|
||||
work = StringIO.StringIO()
|
||||
results = feedparser.parse(data)
|
||||
scrub(testfiles%name, results)
|
||||
reconstitute(results, results.entries[0]).writexml(work)
|
||||
|
||||
# verify the results
|
||||
|
@ -6,7 +6,7 @@ from planet.scrub import scrub
|
||||
from planet import feedparser, config
|
||||
|
||||
feed = '''
|
||||
<feed xmlns='http://www.w3.org/2005/Atom'>
|
||||
<feed xmlns='http://www.w3.org/2005/Atom' xml:base="http://example.com/">
|
||||
<author><name>F&ouml;o</name></author>
|
||||
<entry xml:lang="en">
|
||||
<id>ignoreme</id>
|
||||
@ -15,7 +15,9 @@ feed = '''
|
||||
<title>F&ouml;o</title>
|
||||
<summary>F&ouml;o</summary>
|
||||
<content>F&ouml;o</content>
|
||||
<link href="http://example.com/entry/1/"/>
|
||||
<source>
|
||||
<link href="http://example.com/feed/"/>
|
||||
<author><name>F&ouml;o</name></author>
|
||||
</source>
|
||||
</entry>
|
||||
@ -82,3 +84,33 @@ class ScrubTest(unittest.TestCase):
|
||||
data = deepcopy(base)
|
||||
scrub('testfeed', data)
|
||||
self.assertEqual(0, len(data.entries))
|
||||
|
||||
def test_scrub_xmlbase(self):
|
||||
base = feedparser.parse(feed)
|
||||
self.assertEqual('http://example.com/',
|
||||
base.entries[0].title_detail.base)
|
||||
|
||||
config.parser.readfp(StringIO.StringIO(configData))
|
||||
config.parser.set('testfeed', 'xml_base', 'feed_alternate')
|
||||
data = deepcopy(base)
|
||||
scrub('testfeed', data)
|
||||
self.assertEqual('http://example.com/feed/',
|
||||
data.entries[0].title_detail.base)
|
||||
|
||||
config.parser.set('testfeed', 'xml_base', 'entry_alternate')
|
||||
data = deepcopy(base)
|
||||
scrub('testfeed', data)
|
||||
self.assertEqual('http://example.com/entry/1/',
|
||||
data.entries[0].title_detail.base)
|
||||
|
||||
config.parser.set('testfeed', 'xml_base', 'base/')
|
||||
data = deepcopy(base)
|
||||
scrub('testfeed', data)
|
||||
self.assertEqual('http://example.com/base/',
|
||||
data.entries[0].title_detail.base)
|
||||
|
||||
config.parser.set('testfeed', 'xml_base', 'http://example.org/data/')
|
||||
data = deepcopy(base)
|
||||
scrub('testfeed', data)
|
||||
self.assertEqual('http://example.org/data/',
|
||||
data.entries[0].title_detail.base)
|
||||
|
@ -35,7 +35,7 @@
|
||||
<th>Name</th>
|
||||
<th>Format</th>
|
||||
<xsl:if test="//planet:ignore_in_feed | //planet:filters |
|
||||
//planet:*[contains(local-name(),'_type')]">
|
||||
//planet:xml_base | //planet:*[contains(local-name(),'_type')]">
|
||||
<th>Notes</th>
|
||||
</xsl:if>
|
||||
</tr>
|
||||
@ -128,12 +128,12 @@
|
||||
</a>
|
||||
</td>
|
||||
<td><xsl:value-of select="planet:format"/></td>
|
||||
<xsl:if test="planet:ignore_in_feed | planet:filters |
|
||||
<xsl:if test="planet:ignore_in_feed | planet:filters | planet:xml_base |
|
||||
planet:*[contains(local-name(),'_type')]">
|
||||
<td>
|
||||
<dl>
|
||||
<xsl:for-each select="planet:ignore_in_feed | planet:filters |
|
||||
planet:*[contains(local-name(),'_type')]">
|
||||
planet:xml_base | planet:*[contains(local-name(),'_type')]">
|
||||
<xsl:sort select="local-name()"/>
|
||||
<dt><xsl:value-of select="local-name()"/></dt>
|
||||
<dd><xsl:value-of select="."/></dd>
|
||||
|
Loading…
Reference in New Issue
Block a user