commit 38ae11711eecfc2470f80d99a210fefedf92e987
parent fa5a06c2904ff4c9cb266d4a9df2b35efa0433e5
Author: Rich Lane <rlane@club.cc.cmu.edu>
Date: Sat, 1 Aug 2009 15:56:09 -0700
xapian: drop excessively long terms
Diffstat:
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/lib/sup/xapian_index.rb b/lib/sup/xapian_index.rb
@@ -304,6 +304,8 @@ class XapianIndex < BaseIndex
DATE_VALUENO = 0
+ MAX_TERM_LENGTH = 245
+
# Xapian can very efficiently sort in ascending docid order. Sup always wants
# to sort by descending date, so this method maps between them. In order to
# handle multiple messages per second, we use a logistic curve centered
@@ -428,7 +430,7 @@ class XapianIndex < BaseIndex
@term_generator.document = doc
text.each { |text,prefix| @term_generator.index_text text, 1, prefix }
- terms.each { |term| doc.add_term term }
+ terms.each { |term| doc.add_term term if term.length <= MAX_TERM_LENGTH }
doc.add_value DATE_VALUENO, date_value
doc.data = m.id