commit 5ae3730ff0e6939428c73d4c585e8bee3ca9bbd8
parent 4d7420876c89a6318084507060d9cea3095dd7b9
Author: wmorgan <wmorgan@5c8cc53c-5e98-4d25-b20a-d8db53a31250>
Date: Thu, 28 Dec 2006 18:20:48 +0000
mbox::loader#total went missing for some reason. replaced.
re-fixed tokenization in index.rb to do downcasing. i think that's probably
the right idea for email search.
git-svn-id: svn://rubyforge.org/var/svn/sup/trunk@105 5c8cc53c-5e98-4d25-b20a-d8db53a31250
Diffstat:
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/lib/sup/index.rb b/lib/sup/index.rb
@@ -2,8 +2,8 @@
require 'thread'
require 'fileutils'
-#require 'ferret'
-require_gem 'ferret', ">= 0.10.13"
+require 'ferret'
+#require_gem 'ferret', ">= 0.10.13"
module Redwood
@@ -26,7 +26,7 @@ class Index
@sources = {}
@sources_dirty = false
- wsa = Ferret::Analysis::WhiteSpaceAnalyzer.new true
+ wsa = Ferret::Analysis::WhiteSpaceAnalyzer.new false
sa = Ferret::Analysis::StandardAnalyzer.new Ferret::Analysis::FULL_ENGLISH_STOP_WORDS, true
@analyzer = Ferret::Analysis::PerFieldAnalyzer.new wsa
@analyzer[:body] = sa
@@ -90,7 +90,7 @@ class Index
end
## this happens sometimes. i'm not sure why. ferret bug?
- raise "no entry and no source info for message #{m.id}" unless source && source_info
+ raise "no entry and no source info for message #{m.id}: source #{source.inspect}, info #{source_info.inspect}, entry #{entry.inspect}, query #{Ferret::Search::TermQuery.new(:message_id, m.id)}, results #{@index.search(Ferret::Search::TermQuery.new(:message_id, m.id)).inspect}" unless source && source_info
raise "deleting non-corresponding entry #{docid}" unless @index[docid][:message_id] == m.id
@index.delete docid
diff --git a/lib/sup/mbox/loader.rb b/lib/sup/mbox/loader.rb
@@ -29,6 +29,7 @@ class Loader < Source
def start_offset; 0; end
def end_offset; File.size @f; end
+ def total; end_offset; end
def load_header offset
header = nil