Archive of RubyForge sup-devel mailing list
 help / color / mirror / Atom feed
* [sup-devel] [PATCH] fix handling of multiple label: terms in search
@ 2010-09-29 14:16 Sascha Silbe
  2010-09-29 15:27 ` Edward Z. Yang
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Sascha Silbe @ 2010-09-29 14:16 UTC (permalink / raw)
  To: sup-devel

By default Xapian will join query terms with the same prefix with OR instead
of AND, so searching for multiple labels doesn't return the expected results.
By making use of a parameter to add_boolean_prefix (added in Xapian 1.2) we
can tell Xapian to use OR only for the search terms that are guaranteed to be
unique.

Signed-off-by: Sascha Silbe <sascha-pgp@silbe.org>
---
 lib/sup/index.rb |   74 +++++++++++++++++++++++++++---------------------------
 1 files changed, 37 insertions(+), 37 deletions(-)

Tested on Debian Squeeze with Ruby 1.8.7.302 and Xapian 1.2.3.

diff --git a/lib/sup/index.rb b/lib/sup/index.rb
index 9273f18..a72bec6 100644
--- a/lib/sup/index.rb
+++ b/lib/sup/index.rb
@@ -419,8 +419,8 @@ EOS
     qp.stemming_strategy = Xapian::QueryParser::STEM_SOME
     qp.default_op = Xapian::Query::OP_AND
     qp.add_valuerangeprocessor(Xapian::NumberValueRangeProcessor.new(DATE_VALUENO, 'date:', true))
-    NORMAL_PREFIX.each { |k,vs| vs.each { |v| qp.add_prefix k, v } }
-    BOOLEAN_PREFIX.each { |k,vs| vs.each { |v| qp.add_boolean_prefix k, v } }
+    NORMAL_PREFIX.each { |k,info| info[:prefix].each { |v| qp.add_prefix k, v } }
+    BOOLEAN_PREFIX.each { |k,info| info[:prefix].each { |v| qp.add_boolean_prefix k, v, info[:exclusive] } }

     begin
       xapian_query = qp.parse_query(subs, Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_WILDCARD)
@@ -471,31 +471,31 @@ EOS

   # Stemmed
   NORMAL_PREFIX = {
-    'subject' => 'S',
-    'body' => 'B',
-    'from_name' => 'FN',
-    'to_name' => 'TN',
-    'name' => %w(FN TN),
-    'attachment' => 'A',
-    'email_text' => 'E',
-    '' => %w(S B FN TN A E),
+    'subject' => {:prefix => 'S', :exclusive => false},
+    'body' => {:prefix => 'B', :exclusive => false},
+    'from_name' => {:prefix => 'FN', :exclusive => false},
+    'to_name' => {:prefix => 'TN', :exclusive => false},
+    'name' => {:prefix => %w(FN TN), :exclusive => false},
+    'attachment' => {:prefix => 'A', :exclusive => false},
+    'email_text' => {:prefix => 'E', :exclusive => false},
+    '' => {:prefix => %w(S B FN TN A E), :exclusive => false},
   }

   # Unstemmed
   BOOLEAN_PREFIX = {
-    'type' => 'K',
-    'from_email' => 'FE',
-    'to_email' => 'TE',
-    'email' => %w(FE TE),
-    'date' => 'D',
-    'label' => 'L',
-    'source_id' => 'I',
-    'attachment_extension' => 'O',
-    'msgid' => 'Q',
-    'id' => 'Q',
-    'thread' => 'H',
-    'ref' => 'R',
-    'location' => 'J',
+    'type' => {:prefix => 'K', :exclusive => true},
+    'from_email' => {:prefix => 'FE', :exclusive => false},
+    'to_email' => {:prefix => 'TE', :exclusive => false},
+    'email' => {:prefix => %w(FE TE), :exclusive => false},
+    'date' => {:prefix => 'D', :exclusive => true},
+    'label' => {:prefix => 'L', :exclusive => false},
+    'source_id' => {:prefix => 'I', :exclusive => true},
+    'attachment_extension' => {:prefix => 'O', :exclusive => false},
+    'msgid' => {:prefix => 'Q', :exclusive => true},
+    'id' => {:prefix => 'Q', :exclusive => true},
+    'thread' => {:prefix => 'H', :exclusive => false},
+    'ref' => {:prefix => 'R', :exclusive => false},
+    'location' => {:prefix => 'J', :exclusive => false},
   }

   PREFIX = NORMAL_PREFIX.merge BOOLEAN_PREFIX
@@ -661,8 +661,8 @@ EOS
     # Person names are indexed with several prefixes
     person_termer = lambda do |d|
       lambda do |p|
-        doc.index_text p.name, PREFIX["#{d}_name"] if p.name
-        doc.index_text p.email, PREFIX['email_text']
+        doc.index_text p.name, PREFIX["#{d}_name"][:prefix] if p.name
+        doc.index_text p.email, PREFIX['email_text'][:prefix]
         doc.add_term mkterm(:email, d, p.email)
       end
     end
@@ -673,9 +673,9 @@ EOS
     # Full text search content
     subject_text = m.indexable_subject
     body_text = m.indexable_body
-    doc.index_text subject_text, PREFIX['subject']
-    doc.index_text body_text, PREFIX['body']
-    m.attachments.each { |a| doc.index_text a, PREFIX['attachment'] }
+    doc.index_text subject_text, PREFIX['subject'][:prefix]
+    doc.index_text body_text, PREFIX['body'][:prefix]
+    m.attachments.each { |a| doc.index_text a, PREFIX['attachment'][:prefix] }

     # Miscellaneous terms
     doc.add_term mkterm(:date, m.date) if m.date
@@ -753,25 +753,25 @@ EOS
   def mkterm type, *args
     case type
     when :label
-      PREFIX['label'] + args[0].to_s.downcase
+      PREFIX['label'][:prefix] + args[0].to_s.downcase
     when :type
-      PREFIX['type'] + args[0].to_s.downcase
+      PREFIX['type'][:prefix] + args[0].to_s.downcase
     when :date
-      PREFIX['date'] + args[0].getutc.strftime("%Y%m%d%H%M%S")
+      PREFIX['date'][:prefix] + args[0].getutc.strftime("%Y%m%d%H%M%S")
     when :email
       case args[0]
-      when :from then PREFIX['from_email']
-      when :to then PREFIX['to_email']
+      when :from then PREFIX['from_email'][:prefix]
+      when :to then PREFIX['to_email'][:prefix]
       else raise "Invalid email term type #{args[0]}"
       end + args[1].to_s.downcase
     when :source_id
-      PREFIX['source_id'] + args[0].to_s.downcase
+      PREFIX['source_id'][:prefix] + args[0].to_s.downcase
     when :location
-      PREFIX['location'] + [args[0]].pack('n') + args[1].to_s
+      PREFIX['location'][:prefix] + [args[0]].pack('n') + args[1].to_s
     when :attachment_extension
-      PREFIX['attachment_extension'] + args[0].to_s.downcase
+      PREFIX['attachment_extension'][:prefix] + args[0].to_s.downcase
     when :msgid, :ref, :thread
-      PREFIX[type.to_s] + args[0][0...(MAX_TERM_LENGTH-1)]
+      PREFIX[type.to_s][:prefix] + args[0][0...(MAX_TERM_LENGTH-1)]
     else
       raise "Invalid term type #{type}"
     end
--
1.7.1

_______________________________________________
Sup-devel mailing list
Sup-devel@rubyforge.org
http://rubyforge.org/mailman/listinfo/sup-devel


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2011-01-18 19:21 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-09-29 14:16 [sup-devel] [PATCH] fix handling of multiple label: terms in search Sascha Silbe
2010-09-29 15:27 ` Edward Z. Yang
2010-10-08  4:30 ` Rich Lane
2011-01-17  6:20 ` Rich Lane
2011-01-18 18:11   ` Sascha Silbe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox