commit 0b5f06e2c34d7aa986bfdcecc59d9fdf6650a88a
parent ca10ef47501b52a37e28ad3a3ecf10b6a00dea35
Author: Rich Lane <rlane@club.cc.cmu.edu>
Date: Wed, 24 Mar 2010 21:19:07 -0700
use xapian magic to optimize mbox first_new_message
Diffstat:
2 files changed, 22 insertions(+), 10 deletions(-)
diff --git a/lib/sup/index.rb b/lib/sup/index.rb
@@ -265,6 +265,17 @@ EOS
synchronize { get_entry(id)[:source_id] }
end
+ ## Yields each tearm in the index that starts with prefix
+ def each_prefixed_term prefix
+ term = @xapian._dangerous_allterms_begin prefix
+ lastTerm = @xapian._dangerous_allterms_end prefix
+ until term.equals lastTerm
+ yield term.term
+ term.next
+ end
+ nil
+ end
+
class ParseError < StandardError; end
## parse a query string from the user. returns a query object
diff --git a/lib/sup/mbox.rb b/lib/sup/mbox.rb
@@ -134,20 +134,21 @@ class MBox < Source
@mutex.synchronize do
@f.seek offset
nil while line = @f.gets and not MBox::is_break_line? line
- @f.tell
+ offset = @f.tell
+ offset != File.size(@f) ? offset : nil
end
end
- ## TODO binary search
+ ## TODO optimize this by iterating over allterms list backwards or
+ ## storing source_info negated
+ def last_indexed_message
+ prefix = "J#{[@id].pack('n')}" # XXX index internal
+ Enumerator.new(Index, :each_prefixed_term, prefix).map { |x| x[prefix.length..-1].to_i }.max
+ end
+
+ ## offset of first new message or nil
def first_new_message
- offset = 0
- end_offset = File.size @f
- while offset < end_offset
- offset = next_offset offset
- new = Index.num_results_for(:location => [@id, offset]) == 0
- return offset if new
- end
- return nil
+ next_offset(last_indexed_message || 0)
end
def self.is_break_line? l