From mboxrd@z Thu Jan 1 00:00:00 1970 Received: by 10.204.20.129 with SMTP id f1cs81051bkb; Wed, 29 Sep 2010 07:26:25 -0700 (PDT) Received: by 10.229.11.32 with SMTP id r32mr1167266qcr.242.1285770381316; Wed, 29 Sep 2010 07:26:21 -0700 (PDT) Return-Path: Received: from rubyforge.org (rubyforge.org [205.234.109.19]) by mx.google.com with ESMTP id y11si16725585qco.137.2010.09.29.07.26.20; Wed, 29 Sep 2010 07:26:21 -0700 (PDT) Received-SPF: pass (google.com: domain of sup-devel-bounces@rubyforge.org designates 205.234.109.19 as permitted sender) client-ip=205.234.109.19; Authentication-Results: mx.google.com; spf=pass (google.com: domain of sup-devel-bounces@rubyforge.org designates 205.234.109.19 as permitted sender) smtp.mail=sup-devel-bounces@rubyforge.org Received: from rubyforge.org (rubyforge.org [127.0.0.1]) by rubyforge.org (Postfix) with ESMTP id 6475219783CC; Wed, 29 Sep 2010 10:26:20 -0400 (EDT) Received: from smtp.chost.de (setoy.chost.de [217.160.209.225]) by rubyforge.org (Postfix) with ESMTP id 685421858377 for ; Wed, 29 Sep 2010 10:22:48 -0400 (EDT) Received: (qmail 25837 invoked by uid 5015); 29 Sep 2010 14:16:12 -0000 Received: (nullmailer pid 7624 invoked by uid 123); Wed, 29 Sep 2010 14:16:05 -0000 Received: from twin.sascha.silbe.org (twin.sascha.silbe.org [192.168.1.2]) by flatty.sascha.silbe.org ([192.168.1.252]) with SMTP via TCP; 29 Sep 2010 14:16:05 -0000 Received: (nullmailer pid 15906 invoked by uid 8193); Wed, 29 Sep 2010 14:16:05 -0000 From: Sascha Silbe To: sup-devel Date: Wed, 29 Sep 2010 16:16:02 +0200 Message-Id: <1285769762-15823-1-git-send-email-sascha-pgp@silbe.org> X-Mailer: git-send-email 1.7.1 Mail-Followup-To: Subject: [sup-devel] [PATCH] fix handling of multiple label: terms in search X-BeenThere: sup-devel@rubyforge.org X-Mailman-Version: 2.1.12 Precedence: list Reply-To: Sascha Silbe , Sup developer discussion List-Id: Sup developer discussion List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Sender: sup-devel-bounces@rubyforge.org Errors-To: sup-devel-bounces@rubyforge.org By default Xapian will join query terms with the same prefix with OR instead of AND, so searching for multiple labels doesn't return the expected results. By making use of a parameter to add_boolean_prefix (added in Xapian 1.2) we can tell Xapian to use OR only for the search terms that are guaranteed to be unique. Signed-off-by: Sascha Silbe --- lib/sup/index.rb | 74 +++++++++++++++++++++++++++--------------------------- 1 files changed, 37 insertions(+), 37 deletions(-) Tested on Debian Squeeze with Ruby 1.8.7.302 and Xapian 1.2.3. diff --git a/lib/sup/index.rb b/lib/sup/index.rb index 9273f18..a72bec6 100644 --- a/lib/sup/index.rb +++ b/lib/sup/index.rb @@ -419,8 +419,8 @@ EOS qp.stemming_strategy = Xapian::QueryParser::STEM_SOME qp.default_op = Xapian::Query::OP_AND qp.add_valuerangeprocessor(Xapian::NumberValueRangeProcessor.new(DATE_VALUENO, 'date:', true)) - NORMAL_PREFIX.each { |k,vs| vs.each { |v| qp.add_prefix k, v } } - BOOLEAN_PREFIX.each { |k,vs| vs.each { |v| qp.add_boolean_prefix k, v } } + NORMAL_PREFIX.each { |k,info| info[:prefix].each { |v| qp.add_prefix k, v } } + BOOLEAN_PREFIX.each { |k,info| info[:prefix].each { |v| qp.add_boolean_prefix k, v, info[:exclusive] } } begin xapian_query = qp.parse_query(subs, Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_WILDCARD) @@ -471,31 +471,31 @@ EOS # Stemmed NORMAL_PREFIX = { - 'subject' => 'S', - 'body' => 'B', - 'from_name' => 'FN', - 'to_name' => 'TN', - 'name' => %w(FN TN), - 'attachment' => 'A', - 'email_text' => 'E', - '' => %w(S B FN TN A E), + 'subject' => {:prefix => 'S', :exclusive => false}, + 'body' => {:prefix => 'B', :exclusive => false}, + 'from_name' => {:prefix => 'FN', :exclusive => false}, + 'to_name' => {:prefix => 'TN', :exclusive => false}, + 'name' => {:prefix => %w(FN TN), :exclusive => false}, + 'attachment' => {:prefix => 'A', :exclusive => false}, + 'email_text' => {:prefix => 'E', :exclusive => false}, + '' => {:prefix => %w(S B FN TN A E), :exclusive => false}, } # Unstemmed BOOLEAN_PREFIX = { - 'type' => 'K', - 'from_email' => 'FE', - 'to_email' => 'TE', - 'email' => %w(FE TE), - 'date' => 'D', - 'label' => 'L', - 'source_id' => 'I', - 'attachment_extension' => 'O', - 'msgid' => 'Q', - 'id' => 'Q', - 'thread' => 'H', - 'ref' => 'R', - 'location' => 'J', + 'type' => {:prefix => 'K', :exclusive => true}, + 'from_email' => {:prefix => 'FE', :exclusive => false}, + 'to_email' => {:prefix => 'TE', :exclusive => false}, + 'email' => {:prefix => %w(FE TE), :exclusive => false}, + 'date' => {:prefix => 'D', :exclusive => true}, + 'label' => {:prefix => 'L', :exclusive => false}, + 'source_id' => {:prefix => 'I', :exclusive => true}, + 'attachment_extension' => {:prefix => 'O', :exclusive => false}, + 'msgid' => {:prefix => 'Q', :exclusive => true}, + 'id' => {:prefix => 'Q', :exclusive => true}, + 'thread' => {:prefix => 'H', :exclusive => false}, + 'ref' => {:prefix => 'R', :exclusive => false}, + 'location' => {:prefix => 'J', :exclusive => false}, } PREFIX = NORMAL_PREFIX.merge BOOLEAN_PREFIX @@ -661,8 +661,8 @@ EOS # Person names are indexed with several prefixes person_termer = lambda do |d| lambda do |p| - doc.index_text p.name, PREFIX["#{d}_name"] if p.name - doc.index_text p.email, PREFIX['email_text'] + doc.index_text p.name, PREFIX["#{d}_name"][:prefix] if p.name + doc.index_text p.email, PREFIX['email_text'][:prefix] doc.add_term mkterm(:email, d, p.email) end end @@ -673,9 +673,9 @@ EOS # Full text search content subject_text = m.indexable_subject body_text = m.indexable_body - doc.index_text subject_text, PREFIX['subject'] - doc.index_text body_text, PREFIX['body'] - m.attachments.each { |a| doc.index_text a, PREFIX['attachment'] } + doc.index_text subject_text, PREFIX['subject'][:prefix] + doc.index_text body_text, PREFIX['body'][:prefix] + m.attachments.each { |a| doc.index_text a, PREFIX['attachment'][:prefix] } # Miscellaneous terms doc.add_term mkterm(:date, m.date) if m.date @@ -753,25 +753,25 @@ EOS def mkterm type, *args case type when :label - PREFIX['label'] + args[0].to_s.downcase + PREFIX['label'][:prefix] + args[0].to_s.downcase when :type - PREFIX['type'] + args[0].to_s.downcase + PREFIX['type'][:prefix] + args[0].to_s.downcase when :date - PREFIX['date'] + args[0].getutc.strftime("%Y%m%d%H%M%S") + PREFIX['date'][:prefix] + args[0].getutc.strftime("%Y%m%d%H%M%S") when :email case args[0] - when :from then PREFIX['from_email'] - when :to then PREFIX['to_email'] + when :from then PREFIX['from_email'][:prefix] + when :to then PREFIX['to_email'][:prefix] else raise "Invalid email term type #{args[0]}" end + args[1].to_s.downcase when :source_id - PREFIX['source_id'] + args[0].to_s.downcase + PREFIX['source_id'][:prefix] + args[0].to_s.downcase when :location - PREFIX['location'] + [args[0]].pack('n') + args[1].to_s + PREFIX['location'][:prefix] + [args[0]].pack('n') + args[1].to_s when :attachment_extension - PREFIX['attachment_extension'] + args[0].to_s.downcase + PREFIX['attachment_extension'][:prefix] + args[0].to_s.downcase when :msgid, :ref, :thread - PREFIX[type.to_s] + args[0][0...(MAX_TERM_LENGTH-1)] + PREFIX[type.to_s][:prefix] + args[0][0...(MAX_TERM_LENGTH-1)] else raise "Invalid term type #{type}" end -- 1.7.1 _______________________________________________ Sup-devel mailing list Sup-devel@rubyforge.org http://rubyforge.org/mailman/listinfo/sup-devel