#!/usr/bin/env python import unittest, StringIO from planet.spider import scrub from planet import feedparser, config feed = ''' Föo ignoreme Föo 2000-01-01T00:00:00Z Föo Föo Föo Föo ''' configData = ''' [testfeed] ignore_in_feed = id updated name_type = html title_type = html summary_type = html content_type = html ''' class ScrubTest(unittest.TestCase): def test_scrub(self): data = feedparser.parse(feed) config.parser.readfp(StringIO.StringIO(configData)) self.assertEqual('Föo', data.feed.author_detail.name) self.assertTrue(data.entries[0].has_key('id')) self.assertTrue(data.entries[0].has_key('updated')) self.assertTrue(data.entries[0].has_key('updated_parsed')) scrub('testfeed', data) self.assertFalse(data.entries[0].has_key('id')) self.assertFalse(data.entries[0].has_key('updated')) self.assertFalse(data.entries[0].has_key('updated_parsed')) self.assertEqual('F\xc3\xb6o', data.feed.author_detail.name) self.assertEqual('F\xc3\xb6o', data.entries[0].author_detail.name) self.assertEqual('F\xc3\xb6o', data.entries[0].source.author_detail.name) self.assertEqual('text/html', data.entries[0].title_detail.type) self.assertEqual('text/html', data.entries[0].summary_detail.type) self.assertEqual('text/html', data.entries[0].content[0].type)