Remove more cruft from filenames generated from ids; make such file names

compatible with Win32 file systems; no longer treat PrettyPrinting failures
as fatal.
This commit is contained in:
Sam Ruby 2006-08-17 08:05:10 -04:00
parent 8ae02eaa2b
commit 6c0e24fd00
2 changed files with 16 additions and 8 deletions

View File

@ -14,8 +14,8 @@ except:
PrettyPrint = None PrettyPrint = None
# Regular expressions to sanitise cache filenames # Regular expressions to sanitise cache filenames
re_url_scheme = re.compile(r'^[^:]*://') re_url_scheme = re.compile(r'^\w+:/*(\w+:|www\.)?')
re_slash = re.compile(r'[?/]+') re_slash = re.compile(r'[?/:]+')
re_initial_cruft = re.compile(r'^[,.]*') re_initial_cruft = re.compile(r'^[,.]*')
re_final_cruft = re.compile(r'[,.]*$') re_final_cruft = re.compile(r'[,.]*$')
@ -68,9 +68,13 @@ def spiderFeed(feed):
xml = reconstitute.reconstitute(data, entry) xml = reconstitute.reconstitute(data, entry)
file = open(out,'w') file = open(out,'w')
if PrettyPrint: try:
PrettyPrint(reconstitute.reconstitute(data, entry), file) PrettyPrint(reconstitute.reconstitute(data, entry), file)
else: except:
# known reasons for failure include no pretty printer installed,
# and absurdly high levels of markup nesting causing Python to
# declare infinite recursion.
file.seek(0)
file.write(reconstitute.reconstitute(data, entry).toxml('utf-8')) file.write(reconstitute.reconstitute(data, entry).toxml('utf-8'))
file.close() file.close()

View File

@ -24,8 +24,12 @@ class SpiderTest(unittest.TestCase):
def test_filename(self): def test_filename(self):
self.assertEqual('./example.com,index.html', self.assertEqual('./example.com,index.html',
filename('.', 'http://example.com/index.html')) filename('.', 'http://example.com/index.html'))
self.assertEqual('./www.xn--8ws00zhy3a.com', self.assertEqual('./xn--8ws00zhy3a.com',
filename('.', u'http://www.\u8a79\u59c6\u65af.com/')) filename('.', u'http://www.\u8a79\u59c6\u65af.com/'))
self.assertEqual('./planet.intertwingly.net,2006,testfeed1,1',
filename('.', u'tag:planet.intertwingly.net,2006:testfeed1,1'))
self.assertEqual('./00000000-0000-0000-0000-000000000000',
filename('.', u'urn:uuid:00000000-0000-0000-0000-000000000000'))
def test_spiderFeed(self): def test_spiderFeed(self):
config.load(configfile) config.load(configfile)
@ -37,7 +41,7 @@ class SpiderTest(unittest.TestCase):
# verify that the file names are as expected # verify that the file names are as expected
self.assertTrue(workdir + self.assertTrue(workdir +
'/tag:planet.intertwingly.net,2006:testfeed1,1' in files) '/planet.intertwingly.net,2006,testfeed1,1' in files)
# verify that the file timestamps match atom:updated # verify that the file timestamps match atom:updated
for file in files: for file in files:
@ -59,7 +63,7 @@ class SpiderTest(unittest.TestCase):
# verify that the file names are as expected # verify that the file names are as expected
self.assertTrue(workdir + self.assertTrue(workdir +
'/tag:planet.intertwingly.net,2006:testfeed1,1' in files) '/planet.intertwingly.net,2006,testfeed1,1' in files)
self.assertTrue(workdir + self.assertTrue(workdir +
'/tag:planet.intertwingly.net,2006:testfeed2,1' in files) '/planet.intertwingly.net,2006,testfeed2,1' in files)