commit 8897951bbf91d2b371b58b757ab6e83116f647e9
parent c9b542efbe0c8539250233dc1c238d5bc89da09f
Author: Eric Weikl <eric.weikl@tngtech.com>
Date: Sun, 10 Feb 2013 19:18:16 +0100
Make stemming language configurable.
This change allows you to configure the language used for stemming,
which should result in better search results for users with
predominantly non-English mail.
This change is backwards-compatible - the default stemming language
remains English, unless a specific language is set in your config.yaml.
To do so, add the key :stem_language to you config.yaml with one of
these values:
danish, dutch, english, finnish, french, german, hungarian, italian,
norwegian, portuguese, romanian, russian, spanish, swedish, turkish
IMPORTANT: after changing the language, you must re-index all of your
mail!
See http://xapian.org/docs/stemming.html for more information on how
Xapian performs stemming.
Diffstat:
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/lib/sup.rb b/lib/sup.rb
@@ -285,7 +285,8 @@ EOS
:poll_interval => 300,
:wrap_width => 0,
:slip_rows => 0,
- :col_jump => 2
+ :col_jump => 2,
+ :stem_language => "english"
}
if File.exists? filename
config = Redwood::load_yaml_obj filename
diff --git a/lib/sup/index.rb b/lib/sup/index.rb
@@ -25,7 +25,6 @@ module Redwood
class Index
include InteractiveLock
- STEM_LANGUAGE = "english"
INDEX_VERSION = '4'
## dates are converted to integers for xapian, and are used for document ids,
@@ -428,7 +427,7 @@ EOS
qp = Xapian::QueryParser.new
qp.database = @xapian
- qp.stemmer = Xapian::Stem.new(STEM_LANGUAGE)
+ qp.stemmer = Xapian::Stem.new($config[:stem_language])
qp.stemming_strategy = Xapian::QueryParser::STEM_SOME
qp.default_op = Xapian::Query::OP_AND
qp.add_valuerangeprocessor(Xapian::NumberValueRangeProcessor.new(DATE_VALUENO, 'date:', true))
@@ -804,7 +803,7 @@ class Xapian::Document
def index_text text, prefix, weight=1
term_generator = Xapian::TermGenerator.new
- term_generator.stemmer = Xapian::Stem.new(Redwood::Index::STEM_LANGUAGE)
+ term_generator.stemmer = Xapian::Stem.new($config[:stem_language])
term_generator.document = self
term_generator.index_text text, weight, prefix
end