From: rlane@club.cc.cmu.edu (Rich Lane)
Subject: [sup-talk] [PATCH] index: consistent naming
Date: Tue, 16 Jun 2009 17:24:59 -0700 [thread overview]
Message-ID: <1245198299-18742-2-git-send-email-rlane@club.cc.cmu.edu> (raw)
In-Reply-To: <1245198299-18742-1-git-send-email-rlane@club.cc.cmu.edu>
Replaced use of run_query with each_docid
Rename instances of ferret query objects to 'ferret_query'
Rename 'build_query' to 'build_ferret_query'
Rename hashes passed to index methods to 'query'
Rename 'parse_user_query_string' to 'parse_query'
Change 'parse_query' to return a query hash
Rename 'drop_entry' to 'delete' and modify callers to pass msgid
---
bin/sup-sync | 2 +-
bin/sup-tweak-labels | 4 +-
lib/sup/draft.rb | 2 +-
lib/sup/index.rb | 108 +++++++++++++++------------------
lib/sup/modes/search-results-mode.rb | 20 +++----
5 files changed, 63 insertions(+), 73 deletions(-)
diff --git a/bin/sup-sync b/bin/sup-sync
index a6e3478..a759cbe 100755
--- a/bin/sup-sync
+++ b/bin/sup-sync
@@ -218,7 +218,7 @@ begin
unless seen[m.id]
next unless m.source_info >= opts[:start_at] if opts[:start_at]
puts "Deleting #{m.id}" if opts[:verbose]
- index.drop_entry m.id unless opts[:dry_run]
+ index.delete m.id unless opts[:dry_run]
num_del += 1
end
end
diff --git a/bin/sup-tweak-labels b/bin/sup-tweak-labels
index f526a95..6f603e2 100755
--- a/bin/sup-tweak-labels
+++ b/bin/sup-tweak-labels
@@ -2,6 +2,7 @@
require 'rubygems'
require 'trollop'
+require 'enumerator'
require "sup"
class Float
@@ -81,7 +82,8 @@ begin
end
query += ' ' + opts[:query] if opts[:query]
- docs = Redwood::Index.run_query query
+ parsed_query = index.parse_query query
+ docs = Enumerable::Enumerator.new(index, :each_docid, parsed_query).map
num_total = docs.size
$stderr.puts "Found #{num_total} documents across #{source_ids.length} sources. Scanning..."
diff --git a/lib/sup/draft.rb b/lib/sup/draft.rb
index 32266b5..9127739 100644
--- a/lib/sup/draft.rb
+++ b/lib/sup/draft.rb
@@ -37,7 +37,7 @@ class DraftManager
return
end
raise ArgumentError, "not a draft: source id #{entry[:source_id].inspect}, should be #{DraftManager.source_id.inspect} for #{m.id.inspect} / docno #{docid}" unless entry[:source_id].to_i == DraftManager.source_id
- Index.drop_entry docid
+ Index.delete m.id
File.delete @source.fn_for_offset(entry[:source_info])
UpdateManager.relay self, :single_message_deleted, m
end
diff --git a/lib/sup/index.rb b/lib/sup/index.rb
index 037b941..d15e7bb 100644
--- a/lib/sup/index.rb
+++ b/lib/sup/index.rb
@@ -279,28 +279,28 @@ EOS
## you should probably not call this on a block that doesn't break
## rather quickly because the results can be very large.
EACH_BY_DATE_NUM = 100
- def each_id_by_date opts={}
+ def each_id_by_date query={}
return if empty? # otherwise ferret barfs ###TODO: remove this once my ferret patch is accepted
- query = build_query opts
+ ferret_query = build_ferret_query query
offset = 0
while true
- limit = (opts[:limit])? [EACH_BY_DATE_NUM, opts[:limit] - offset].min : EACH_BY_DATE_NUM
- results = @index_mutex.synchronize { @index.search query, :sort => "date DESC", :limit => limit, :offset => offset }
- Redwood::log "got #{results.total_hits} results for query (offset #{offset}) #{query.inspect}"
+ limit = (query[:limit])? [EACH_BY_DATE_NUM, query[:limit] - offset].min : EACH_BY_DATE_NUM
+ results = @index_mutex.synchronize { @index.search ferret_query, :sort => "date DESC", :limit => limit, :offset => offset }
+ Redwood::log "got #{results.total_hits} results for query (offset #{offset}) #{ferret_query.inspect}"
results.hits.each do |hit|
yield @index_mutex.synchronize { @index[hit.doc][:message_id] }, lambda { build_message hit.doc }
end
- break if opts[:limit] and offset >= opts[:limit] - limit
+ break if query[:limit] and offset >= query[:limit] - limit
break if offset >= results.total_hits - limit
offset += limit
end
end
- def num_results_for opts={}
+ def num_results_for query={}
return 0 if empty? # otherwise ferret barfs ###TODO: remove this once my ferret patch is accepted
- q = build_query opts
- @index_mutex.synchronize { @index.search(q, :limit => 1).total_hits }
+ ferret_query = build_ferret_query query
+ @index_mutex.synchronize { @index.search(ferret_query, :limit => 1).total_hits }
end
## yield all messages in the thread containing 'm' by repeatedly
@@ -313,7 +313,7 @@ EOS
## is found.
SAME_SUBJECT_DATE_LIMIT = 7
MAX_CLAUSES = 1000
- def each_message_in_thread_for m, opts={}
+ def each_message_in_thread_for m, query={}
#Redwood::log "Building thread for #{m.id}: #{m.subj}"
messages = {}
searched = {}
@@ -332,7 +332,7 @@ EOS
q.add_query sq, :must
q.add_query Ferret::Search::RangeQuery.new(:date, :>= => date_min.to_indexable_s, :<= => date_max.to_indexable_s), :must
- q = build_query :qobj => q
+ q = build_ferret_query :qobj => q
p1 = @index_mutex.synchronize { @index.search(q).hits.map { |hit| @index[hit.doc][:message_id] } }
Redwood::log "found #{p1.size} results for subject query #{q}"
@@ -343,7 +343,7 @@ EOS
pending = (pending + p1 + p2).uniq
end
- until pending.empty? || (opts[:limit] && messages.size >= opts[:limit])
+ until pending.empty? || (query[:limit] && messages.size >= query[:limit])
q = Ferret::Search::BooleanQuery.new true
# this disappeared in newer ferrets... wtf.
# q.max_clause_count = 2048
@@ -356,14 +356,14 @@ EOS
end
pending = pending[lim .. -1]
- q = build_query :qobj => q
+ q = build_ferret_query :qobj => q
num_queries += 1
killed = false
@index_mutex.synchronize do
@index.search_each(q, :limit => :all) do |docid, score|
- break if opts[:limit] && messages.size >= opts[:limit]
- if @index[docid][:label].split(/\s+/).include?("killed") && opts[:skip_killed]
+ break if query[:limit] && messages.size >= query[:limit]
+ if @index[docid][:label].split(/\s+/).include?("killed") && query[:skip_killed]
killed = true
break
end
@@ -419,7 +419,7 @@ EOS
def wrap_subj subj; "__START_SUBJECT__ #{subj} __END_SUBJECT__"; end
def unwrap_subj subj; subj =~ /__START_SUBJECT__ (.*?) __END_SUBJECT__/ && $1; end
- def drop_entry docno; @index_mutex.synchronize { @index.delete docno } end
+ def delete id; @index_mutex.synchronize { @index.delete id } end
def load_entry_for_id mid
@index_mutex.synchronize do
@@ -478,27 +478,14 @@ EOS
@index_mutex.synchronize { @index.search(q, :limit => 1).total_hits > 0 }
end
- ## takes a user query string and returns the list of docids for messages
- ## that match the query.
- ##
- ## messages can then be loaded from the index with #build_message.
- ##
- ## raises a ParseError if the parsing failed.
- def run_query query
- qobj, opts = Redwood::Index.parse_user_query_string query
- query = Redwood::Index.build_query opts.merge(:qobj => qobj)
- results = @index.search query, :limit => (opts[:limit] || :all)
- results.hits.map { |hit| hit.doc }
- end
-
- def each_docid opts={}
- query = build_query opts
- results = @index_mutex.synchronize { @index.search query, :limit => (opts[:limit] || :all) }
+ def each_docid query={}
+ ferret_query = build_ferret_query query
+ results = @index_mutex.synchronize { @index.search ferret_query, :limit => (query[:limit] || :all) }
results.hits.map { |hit| yield hit.doc }
end
- def each_message opts={}
- each_docid opts do |docid|
+ def each_message query={}
+ each_docid query do |docid|
yield build_message(docid)
end
end
@@ -507,16 +494,15 @@ EOS
@index_mutex.synchronize { @index.optimize }
end
-protected
-
class ParseError < StandardError; end
- ## parse a query string from the user. returns a query object and a set of
- ## extra flags; both of these are meant to be passed to #build_query.
+ ## parse a query string from the user. returns a query object
+ ## that can be passed to any index method with a 'query'
+ ## argument, as well as build_ferret_query.
##
## raises a ParseError if something went wrong.
- def parse_user_query_string s
- extraopts = {}
+ def parse_query s
+ query = {}
subs = s.gsub(/\b(to|from):(\S+)\b/) do
field, name = $1, $2
@@ -542,8 +528,8 @@ protected
## final stage of query processing. if the user wants to search spam
## messages, not adding that is the right thing; if he doesn't want to
## search spam messages, then not adding it won't have any effect.
- extraopts[:load_spam] = true if subs =~ /\blabel:spam\b/
- extraopts[:load_deleted] = true if subs =~ /\blabel:deleted\b/
+ query[:load_spam] = true if subs =~ /\blabel:spam\b/
+ query[:load_deleted] = true if subs =~ /\blabel:deleted\b/
## gmail style "is" operator
subs = subs.gsub(/\b(is|has):(\S+)\b/) do
@@ -552,10 +538,10 @@ protected
when "read"
"-label:unread"
when "spam"
- extraopts[:load_spam] = true
+ query[:load_spam] = true
"label:spam"
when "deleted"
- extraopts[:load_deleted] = true
+ query[:load_deleted] = true
"label:deleted"
else
"label:#{$2}"
@@ -601,7 +587,7 @@ protected
subs = subs.gsub(/\blimit:(\S+)\b/) do
lim = $1
if lim =~ /^\d+$/
- extraopts[:limit] = lim.to_i
+ query[:limit] = lim.to_i
''
else
raise ParseError, "non-numeric limit #{lim.inspect}"
@@ -609,32 +595,36 @@ protected
end
begin
- [@qparser.parse(subs), extraopts]
+ query[:qobj] = @qparser.parse(subs)
+ query[:text] = s
+ query
rescue Ferret::QueryParser::QueryParseException => e
raise ParseError, e.message
end
end
- def build_query opts
- query = Ferret::Search::BooleanQuery.new
- query.add_query opts[:qobj], :must if opts[:qobj]
- labels = ([opts[:label]] + (opts[:labels] || [])).compact
- labels.each { |t| query.add_query Ferret::Search::TermQuery.new("label", t.to_s), :must }
- if opts[:participants]
+private
+
+ def build_ferret_query query
+ q = Ferret::Search::BooleanQuery.new
+ q.add_query query[:qobj], :must if query[:qobj]
+ labels = ([query[:label]] + (query[:labels] || [])).compact
+ labels.each { |t| q.add_query Ferret::Search::TermQuery.new("label", t.to_s), :must }
+ if query[:participants]
q2 = Ferret::Search::BooleanQuery.new
- opts[:participants].each do |p|
+ query[:participants].each do |p|
q2.add_query Ferret::Search::TermQuery.new("from", p.email), :should
q2.add_query Ferret::Search::TermQuery.new("to", p.email), :should
end
- query.add_query q2, :must
+ q.add_query q2, :must
end
- query.add_query Ferret::Search::TermQuery.new("label", "spam"), :must_not unless opts[:load_spam] || labels.include?(:spam)
- query.add_query Ferret::Search::TermQuery.new("label", "deleted"), :must_not unless opts[:load_deleted] || labels.include?(:deleted)
- query.add_query Ferret::Search::TermQuery.new("label", "killed"), :must_not if opts[:skip_killed]
+ q.add_query Ferret::Search::TermQuery.new("label", "spam"), :must_not unless query[:load_spam] || labels.include?(:spam)
+ q.add_query Ferret::Search::TermQuery.new("label", "deleted"), :must_not unless query[:load_deleted] || labels.include?(:deleted)
+ q.add_query Ferret::Search::TermQuery.new("label", "killed"), :must_not if query[:skip_killed]
- query.add_query Ferret::Search::TermQuery.new("source_id", opts[:source_id]), :must if opts[:source_id]
- query
+ q.add_query Ferret::Search::TermQuery.new("source_id", query[:source_id]), :must if query[:source_id]
+ q
end
def save_sources fn=Redwood::SOURCE_FN
diff --git a/lib/sup/modes/search-results-mode.rb b/lib/sup/modes/search-results-mode.rb
index 227ee9b..121e817 100644
--- a/lib/sup/modes/search-results-mode.rb
+++ b/lib/sup/modes/search-results-mode.rb
@@ -1,11 +1,9 @@
module Redwood
class SearchResultsMode < ThreadIndexMode
- def initialize qobj, qopts = nil
- @qobj = qobj
- @qopts = qopts
-
- super [], { :qobj => @qobj }.merge(@qopts)
+ def initialize query
+ @query = query
+ super [], query
end
register_keymap do |k|
@@ -13,9 +11,9 @@ class SearchResultsMode < ThreadIndexMode
end
def refine_search
- query = BufferManager.ask :search, "refine query: ", (@qobj.to_s + " ")
- return unless query && query !~ /^\s*$/
- SearchResultsMode.spawn_from_query query
+ text = BufferManager.ask :search, "refine query: ", (@query[:text] + " ")
+ return unless text && text !~ /^\s*$/
+ SearchResultsMode.spawn_from_query text
end
## a proper is_relevant? method requires some way of asking ferret
@@ -26,10 +24,10 @@ class SearchResultsMode < ThreadIndexMode
def self.spawn_from_query text
begin
- qobj, extraopts = Index.parse_user_query_string(text)
- return unless qobj
+ query = Index.parse_query(text)
+ return unless query
short_text = text.length < 20 ? text : text[0 ... 20] + "..."
- mode = SearchResultsMode.new qobj, extraopts
+ mode = SearchResultsMode.new query
BufferManager.spawn "search: \"#{short_text}\"", mode
mode.load_threads :num => mode.buffer.content_height
rescue Index::ParseError => e
--
1.6.0.4
prev parent reply other threads:[~2009-06-17 0:24 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-06-17 0:24 [sup-talk] [PATCH] index: cleanup interface Rich Lane
2009-06-17 0:24 ` Rich Lane [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1245198299-18742-2-git-send-email-rlane@club.cc.cmu.edu \
--to=rlane@club.cc.cmu.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox