sup

A curses threads-with-tags style email client

sup.git

git clone https://supmua.dev/git/sup/
commit 63f87a5ff3a996352cd733b6ac2e432c9e86ab97
parent b6097e518d47e94adb5231451cba6ccbecb7cf00
Author: William Morgan <wmorgan-sup@masanjin.net>
Date:   Wed, 24 Jun 2009 13:30:43 -0400

bugfix: dates need to be truncated for xapian to index

If dates are way out of range, the current indexing process both dies
and generates bad doc ids. This patch forces dates to be within a
reasonable range (current between 1969 and 2038.) Not necessarily the
best solution.

Diffstat:
M lib/sup/xapian_index.rb | 28 ++++++++++++++++++++++++----
1 file changed, 24 insertions(+), 4 deletions(-)
diff --git a/lib/sup/xapian_index.rb b/lib/sup/xapian_index.rb
@@ -10,6 +10,12 @@ module Redwood
 class XapianIndex < BaseIndex
   STEM_LANGUAGE = "english"
 
+  ## dates are converted to integers for xapian, and are used for document ids,
+  ## so we must ensure they're reasonably valid. this typically only affect
+  ## spam.
+  MIN_DATE = Time.at 0
+  MAX_DATE = Time.at(2**31)
+
   def initialize dir=BASE_DIR
     super
 
@@ -307,8 +313,8 @@ class XapianIndex < BaseIndex
   DOCID_SCALE = 2.0**32
   TIME_SCALE = 2.0**27
   MIDDLE_DATE = Time.gm(2011)
-  def assign_docid m
-    t = (m.date.to_i - MIDDLE_DATE.to_i).to_f
+  def assign_docid m, truncated_date
+    t = (truncated_date.to_i - MIDDLE_DATE.to_i).to_f
     docid = (DOCID_SCALE - DOCID_SCALE/(Math::E**(-(t/TIME_SCALE)) + 1)).to_i
     begin
       while @assigned_docids.member? [docid].pack("N")
@@ -400,11 +406,25 @@ class XapianIndex < BaseIndex
     text << [body_text, PREFIX['body']]
     m.attachments.each { |a| text << [a, PREFIX['attachment']] }
 
+    truncated_date = if m.date < MIN_DATE
+      Redwood::log "warning: adjusting too-low date #{m.date} for indexing"
+      MIN_DATE
+    elsif m.date > MAX_DATE
+      Redwood::log "warning: adjusting too-high date #{m.date} for indexing"
+      MAX_DATE
+    else
+      m.date
+    end
+
     # Date value for range queries
-    date_value = Xapian.sortable_serialise(m.date.to_i)
+    date_value = begin
+      Xapian.sortable_serialise truncated_date.to_i
+    rescue TypeError
+      Xapian.sortable_serialise 0
+    end
 
     doc = Xapian::Document.new
-    docid = @docids[m.id] || assign_docid(m)
+    docid = @docids[m.id] || assign_docid(m, truncated_date)
 
     @term_generator.document = doc
     text.each { |text,prefix| @term_generator.index_text text, 1, prefix }