#!/usr/bin/env ruby

require "sup"

class Float
  def to_s; sprintf '%.2f', self; end
end

class Numeric
  def to_time_s
    i = to_i
    sprintf "%d:%02d:%02d", i / 3600, (i / 60) % 60, i % 60
  end
end

def time
  startt = Time.now
  yield
  Time.now - startt
end

def educate_user
  $stderr.puts <<EOS
Loads messages into the Sup index, adding sources as needed to the
source list.

Usage:
  sup-import [options] <source>*
where <source>* is zero or more source descriptions (e.g., mbox
filenames on disk, or imap/imaps URIs). 

If the sources listed are not already in the Sup source list,
they will be added to it, as parameterized by the following options:
  --archive: messages from these sources will not appear in the inbox
  --unusual: these sources will not be polled when the flag --the-usual
             is called

Regardless of whether the sources are new or not, they will be polled,
and any new messages will be added to the index, as parameterized by
the following options:
  --force-archive: regardless of the source "archive" flag, any new
                   messages found will not appear in the inbox.
  --force-read:    any messages found will not be marked as new.

The following options can also be specified:
  --the-usual:     import new messages from all usual sources
  --rebuild:       rebuild the index for the specified sources rather than
                   just adding new messages. Useful if the sources
                   have changed in any way *other* than new messages
                   being added.
  --force-rebuild: force a rebuild of all messages in the inbox, not just
                   ones that have changed. You probably won't need this
                   unless William changes the index format.
  --optimize:      optimize the index after adding any new messages.
  --help:          don't do anything, just show this message.
EOS
  exit
end

educate_user if ARGV.member? '--help'

archive = ARGV.delete "--archive"
unusual = ARGV.delete "--unusual"
force_archive = ARGV.delete "--force-archive"
force_read = ARGV.delete "--force-read"
the_usual = ARGV.delete "--the-usual"
rebuild = ARGV.delete "--rebuild"
force_rebuild = ARGV.delete "--force-rebuild"
optimize = ARGV.delete "--optimize"
start_at = # ok really need to use optparse or something now
  if(i = ARGV.index("--start-at"))
    raise "start-at requires a numeric argument: #{ARGV[i + 1].inspect}" unless ARGV.length > (i + 1) && ARGV[i + 1] =~ /\d/
    ARGV.delete_at i
    ARGV.delete_at(i).to_i # whoa!
  end

if(o = ARGV.find { |x| x =~ /^--/ })
  $stderr.puts "error: unknown option #{o}"
  educate_user
end

puts "loading index..."
index = Redwood::Index.new
index.load
puts "loaded index of #{index.size} messages"

sources = ARGV.map do |fn|
  fn = "mbox://#{fn}" unless fn =~ %r!://!
  source = index.source_for fn
  unless source
    source = 
      case fn
      when %r!^imaps?://!
        print "Username for #{fn}: "
        username = $stdin.gets.chomp
        print "Password for #{fn} (warning: cleartext): "
        password = $stdin.gets.chomp
        Redwood::IMAP.new(fn, username, password, nil, !unusual, !!archive)
      else
        Redwood::MBox::Loader.new(fn, nil, !unusual, !!archive)
      end
    index.add_source source
  end
  source
end

sources = (sources + index.usual_sources).uniq if the_usual
if rebuild || force_rebuild
  if start_at
    sources.each { |s| s.seek_to! start_at }
  else
    sources.each { |s| s.reset! }
  end
end

found = {}
start = Time.now
begin
  sources.each do |source|
    if source.broken?
      puts "error loading messages from #{source}: #{source.broken_msg}"
      next
    end
    next if source.done?
    puts "loading from #{source}... "
    num = 0
    start_offset = nil
    source.each do |offset, labels|
      start_offset ||= offset
      labels -= [:inbox] if force_archive
      labels -= [:unread] if force_read
      begin
        m = Redwood::Message.new :source => source, :source_info => offset, :labels => labels
        if found[m.id]
          puts "skipping duplicate message #{m.id}"
          next
        else
          found[m.id] = true
        end
        m.remove_label :unread if m.status == "RO" unless force_read
        puts "# message at #{offset}, labels: #{labels * ', '}" unless rebuild || force_rebuild
        if (rebuild || force_rebuild) && 
            (docid, entry = index.load_entry_for_id(m.id)) && entry
          if force_rebuild || entry[:source_info].to_i != offset
            puts "replacing message #{m.id} labels #{entry[:label].inspect} (offset #{entry[:source_info]} => #{offset})"
            m.labels = entry[:label].split.map { |l| l.intern }
            num += 1 if index.update_message m, source, offset
          end
        else
          num += 1 if index.add_message m
        end
      rescue Redwood::MessageFormatError, Redwood::SourceError => e
        $stderr.puts "ignoring erroneous message at #{source}##{offset}: #{e.message}"
      end
      if num % 1000 == 0 && num > 0
        elapsed = Time.now - start
        pctdone = (offset.to_f - start_offset) / (source.total.to_f - start_offset)
        remaining = (source.total.to_f - offset.to_f) * (elapsed.to_f / (offset.to_f - start_offset))
        puts "## #{num} (#{(pctdone * 100.0)}% done) read; #{elapsed.to_time_s} elapsed; est. #{remaining.to_time_s} remaining"
      end
    end
    puts "loaded #{num} messages" unless num == 0
  end
ensure
  index.save
end

if rebuild || force_rebuild
  puts "deleting missing messages from the index..."
  numdel = num = 0
  sources.each do |source|
    raise "no source id for #{source}" unless source.id
    q = "+source_id:#{source.id}"
    q += " +source_info: >= #{start_at}" if start_at
    #p q
    num += index.index.search_each(q, :limit => :all) do |docid, score|
      mid = index.index[docid][:message_id]
      next if found[mid]
      puts "deleting #{mid}"
      index.index.delete docid
      numdel += 1
    end
    #p num
  end
  puts "deleted #{numdel} / #{num} messages"
end

if optimize
  puts "optimizing index..."
  optt = time { index.index.optimize }
  puts "optimized index of size #{index.size} in #{optt}s."
end
