From mboxrd@z Thu Jan 1 00:00:00 1970 From: rlane@club.cc.cmu.edu (Rich Lane) Date: Sun, 16 Aug 2009 23:38:43 -0700 Subject: [sup-talk] [PATCH] index log Message-ID: <1250491123-19240-1-git-send-email-rlane@club.cc.cmu.edu> Add a YAML logfile that records changes to the index and modify sup-dump to use this rather than the normal database. The log is index format/version agnostic so that users can switch between incompatible Sup versions without running sup-dump first. This should also make automated backups easier. --- bin/sup-dump | 19 +++++++++++++------ lib/sup/ferret_index.rb | 7 +++++++ lib/sup/index.rb | 22 ++++++++++++++++++++++ lib/sup/xapian_index.rb | 7 ++++++- lib/sup/yaml_log.rb | 25 +++++++++++++++++++++++++ 5 files changed, 73 insertions(+), 7 deletions(-) create mode 100644 lib/sup/yaml_log.rb diff --git a/bin/sup-dump b/bin/sup-dump index ba36b21..531a30a 100755 --- a/bin/sup-dump +++ b/bin/sup-dump @@ -2,7 +2,8 @@ require 'rubygems' require 'trollop' -require "sup" +require 'sup' # Redwood::VERSION, Redwood::BASE_DIR +require "sup/yaml_log" $opts = Trollop::options do version "sup-dump (sup #{Redwood::VERSION})" @@ -21,10 +22,16 @@ No options. EOS end -index = Redwood::Index.new -Redwood::SourceManager.new -index.load +labels = {} -index.each_message :load_spam => true, :load_deleted => true, :load_killed => true do |m| - puts "#{m.id} (#{m.labels * ' '})" +Redwood::log "processing index log" +index_log = YamlLogReader.new File.join(Redwood::BASE_DIR, 'index_log.yaml') +index_log.each do |h| + case h['type'] + when 'add_message', 'update_message_state' + labels[h['id']] = h['labels'] + end end + +Redwood::log "dumping labels" +labels.each { |msgid,labels| puts "#{msgid} (#{labels * ' '})" } diff --git a/lib/sup/ferret_index.rb b/lib/sup/ferret_index.rb index 98ea9b5..2cb9759 100644 --- a/lib/sup/ferret_index.rb +++ b/lib/sup/ferret_index.rb @@ -57,6 +57,7 @@ EOS def sync_message m, opts={} entry = @index[m.id] + existed = !entry.nil? raise "no source info for message #{m.id}" unless m.source && m.source_info @@ -131,6 +132,12 @@ EOS } @index_mutex.synchronize do + if existed + @log.update_message_state m.id, m.labels + else + @log.add_message m.id, m.labels + end + @index.delete m.id @index.add_document d end diff --git a/lib/sup/index.rb b/lib/sup/index.rb index 54ec843..7360cf5 100644 --- a/lib/sup/index.rb +++ b/lib/sup/index.rb @@ -1,6 +1,7 @@ ## Index interface, subclassed by Ferret indexer. require 'fileutils' +require 'sup/yaml_log' begin require 'chronic' @@ -65,6 +66,7 @@ class BaseIndex def load SourceManager.load_sources + @log = IndexLogWriter.new File.join(@dir, 'index_log.yaml') load_index end @@ -176,6 +178,26 @@ class BaseIndex def parse_query s unimplemented end + + private + + class IndexLogWriter < YamlLogWriter + def update_message_state id, labels + write_entry 'update_message_state', 'id' => id, 'labels' => labels.map { |x| x.to_s } + end + + def add_message id, labels + write_entry 'add_message', 'id' => id, 'labels' => labels.map { |x| x.to_s } + end + + def remove_message id + write_entry 'remove_message', 'id' => id + end + + def write_entry type, hash + self << hash.merge('type' => type, 'time' => Time.now) + end + end end index_name = ENV['SUP_INDEX'] || $config[:index] || DEFAULT_INDEX diff --git a/lib/sup/xapian_index.rb b/lib/sup/xapian_index.rb index 18b5050..c4dbc5f 100644 --- a/lib/sup/xapian_index.rb +++ b/lib/sup/xapian_index.rb @@ -61,7 +61,10 @@ class XapianIndex < BaseIndex end def delete id - synchronize { @xapian.delete_document mkterm(:msgid, id) } + synchronize do + @log.remove_message id + @xapian.delete_document mkterm(:msgid, id) + end end def build_message id @@ -510,10 +513,12 @@ class XapianIndex < BaseIndex Redwood::log "warning: docid underflow, dropping #{m.id.inspect}" return end + @log.add_message m.id, m.labels else doc.clear_terms doc.clear_values docid = doc.docid + @log.update_message_state m.id, m.labels end @term_generator.document = doc diff --git a/lib/sup/yaml_log.rb b/lib/sup/yaml_log.rb new file mode 100644 index 0000000..325cca9 --- /dev/null +++ b/lib/sup/yaml_log.rb @@ -0,0 +1,25 @@ +class YamlLogReader + include Enumerable + + def initialize filename + @io = File.open(filename, 'r+') + end + + def each &b + @io.rewind + YAML.each_document @io, &b + end +end + +class YamlLogWriter + def initialize filename + @io = File.open(filename, 'a') + end + + def <<(o) + YAML.dump o, @io + + ## This only flushes to the OS. We may want to fsync occasionally too. + @io.flush + end +end -- 1.6.4