From 632fe3e7cc7149ae79ea4d0304728daa2240938f Mon Sep 17 00:00:00 2001 From: Sam Ruby Date: Thu, 14 Oct 2010 04:58:09 +0800 Subject: [PATCH 1/5] Ignore anchor when fetching the page for the title --- filters/mememe.plugin | 1 + 1 file changed, 1 insertion(+) diff --git a/filters/mememe.plugin b/filters/mememe.plugin index e2cf038..30a1f2a 100644 --- a/filters/mememe.plugin +++ b/filters/mememe.plugin @@ -214,6 +214,7 @@ class html(sgmllib.SGMLParser): self.feedurl = "" self.intitle = False + url = url.split('#')[0] headers = check_cache(url) try: From 5cefb0fdebbe74ced16449818bad69433b55c378 Mon Sep 17 00:00:00 2001 From: Scott Johnson Date: Sat, 16 Oct 2010 13:11:10 +0800 Subject: [PATCH 2/5] remove ul when no memes are found --- filters/mememe.plugin | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/filters/mememe.plugin b/filters/mememe.plugin index 30a1f2a..8073a27 100644 --- a/filters/mememe.plugin +++ b/filters/mememe.plugin @@ -496,6 +496,10 @@ for i in range(0,len(weighted_links)): count = count + 1 if count >= 10: break +# remove ul when there are no memes +if memes_ul.lsCountNode() < 1: + memes_ul.unlinkNode() + log.info("Writing " + MEMES_ATOM) output=open(MEMES_ATOM,'w') output.write(feed_doc.serialize('utf-8')) From 62d3e68f793537a982cc850d87b407864622441e Mon Sep 17 00:00:00 2001 From: Sam Ruby Date: Tue, 26 Oct 2010 02:57:35 +0800 Subject: [PATCH 3/5] Fix for problem found by Seth Vidal: http://lists.planetplanet.org/archives/devel/2010-August/002156.html --- planet/spider.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/planet/spider.py b/planet/spider.py index 034b312..311077f 100644 --- a/planet/spider.py +++ b/planet/spider.py @@ -428,8 +428,6 @@ def spiderPlanet(only_if_new = False): # Process the results as they arrive feeds_seen = {} while fetch_queue.qsize() or parse_queue.qsize() or threads: - while parse_queue.qsize() == 0 and threads: - time.sleep(0.1) while parse_queue.qsize(): (uri, feed_info, feed) = parse_queue.get(False) try: @@ -487,6 +485,8 @@ def spiderPlanet(only_if_new = False): traceback.format_tb(tb)): log.error(line.rstrip()) + time.sleep(0.1) + for index in threads.keys(): if not threads[index].isAlive(): del threads[index] From 635174e3d2b5f1721e5a3b2471097f507d3042a4 Mon Sep 17 00:00:00 2001 From: Sam Ruby Date: Tue, 26 Oct 2010 03:00:33 +0800 Subject: [PATCH 4/5] =?UTF-8?q?Fix=20problem=20found=20by=20Alex=20Schr?= =?UTF-8?q?=C3=B6der:=20http://lists.planetplanet.org/archives/devel/2010-?= =?UTF-8?q?October/002164.html?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- planet/shell/xslt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/planet/shell/xslt.py b/planet/shell/xslt.py index 0b6579f..24173ea 100644 --- a/planet/shell/xslt.py +++ b/planet/shell/xslt.py @@ -8,7 +8,7 @@ def quote(string, apos): if string.find("'")<0: return "'" + string + "'" - elif string.find("'")<0: + elif string.find('"')<0: return '"' + string + '"' else: # unclear how to quote strings with both types of quotes for libxslt From c2ff9f22dbf2089f1d40876c276b9cb965fc47ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Morten=20H=C3=B8ybye=20Frederiksen?= Date: Sat, 30 Oct 2010 20:28:35 +0200 Subject: [PATCH 5/5] Reconstitue dc:date.Taken as dc:date_Taken --- planet/reconstitute.py | 5 +++++ tests/data/reconstitute/dc_date_taken.xml | 12 ++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 tests/data/reconstitute/dc_date_taken.xml diff --git a/planet/reconstitute.py b/planet/reconstitute.py index 8d0979c..ffba7a0 100644 --- a/planet/reconstitute.py +++ b/planet/reconstitute.py @@ -279,6 +279,11 @@ def reconstitute(feed, entry): date(xentry, 'updated', entry_updated(feed.feed, entry, time.gmtime())) date(xentry, 'published', entry.get('published_parsed',None)) + if entry.has_key('dc_date.taken'): + date_Taken = createTextElement(xentry, '%s:%s' % ('dc','date_Taken'), '%s' % entry.get('dc_date.taken', None)) + date_Taken.setAttribute('xmlns:%s' % 'dc', 'http://purl.org/dc/elements/1.1/') + xentry.appendChild(date_Taken) + for tag in entry.get('tags',[]): category(xentry, tag) diff --git a/tests/data/reconstitute/dc_date_taken.xml b/tests/data/reconstitute/dc_date_taken.xml new file mode 100644 index 0000000..3d83960 --- /dev/null +++ b/tests/data/reconstitute/dc_date_taken.xml @@ -0,0 +1,12 @@ + + + + + 2010-10-15T16:10:05-01:00 + + +