#!/usr/bin/env ruby

require 'uri'
require 'rubygems'
require 'trollop'
require "sup"

Thread.abort_on_exception = true # make debugging possible

class Float
  def to_s; sprintf '%.2f', self; end
end

class Numeric
  def to_time_s
    i = to_i
    sprintf "%d:%02d:%02d", i / 3600, (i / 60) % 60, i % 60
  end
end

def time
  startt = Time.now
  yield
  Time.now - startt
end

opts = Trollop::options do
  version "sup-import (sup #{Redwood::VERSION})"
  banner <<EOS
Imports messages into the Sup index from one or more sources.

Usage:
  sup-import [options] <source>*

where <source>* is zero or more source URIs or mbox filenames. If no
sources are given, imports messages from all sources marked as
"usual".

Options are:
EOS
  opt :archive, "Automatically archive any imported messages."
  opt :read, "Automatically mark as read any imported messages."
  opt :verbose, "Print message ids as they're processed."
  opt :optimize, "As the last stage of the import, optimize the index."
  text <<EOS

The following options allow sup-import to consider *all* messages in the
source, not just new ones:
EOS
  opt :rebuild, "Scan over the entire source and update the index to account for any messages that have been deleted, altered, or moved from another source."
  opt :full_rebuild, "Re-insert all messages in the source, not just ones that have changed or are new."
  opt :start_at, "For rescan and rebuild, start at the given offset.", :type => :int
  opt :overwrite_state, "For --full-rebuild, overwrite the message state to the default state for that source, obeying --archive and --read if given."
end
Trollop::die :start_at, "must be non-negative" if (opts[:start_at] || 0) < 0
Trollop::die :start_at, "requires either --rebuild or --full-rebuild" if opts[:start_at] && !(opts[:rebuild] || opts[:full_rebuild])
Trollop::die :overwrite_state, "requires --full-rebuild" if opts[:overwrite_state] && !opts[:full_rebuild]
Trollop::die :force_rebuild, "cannot be specified with --rebuild" if opts[:full_rebuild] && opts[:rebuild]

Redwood::start
index = Redwood::Index.new
index.load

sources = ARGV.map do |uri|
  uri = "mbox://#{uri}" unless uri =~ %r!://!
  index.source_for uri or raise "Unknown source: #{uri}"
end

sources = index.usual_sources if sources.empty?

if opts[:rebuild] || opts[:full_rebuild]
  if opts[:start_at]
    sources.each { |s| s.seek_to! opts[:start_at] }
  else
    sources.each { |s| s.reset! }
  end
end

last_update = start = Time.now
found = {}
begin
  sources.each do |source|
    num_added = 0
    num_updated = 0
    puts "Scanning #{source}..."
    Redwood::PollManager.add_new_messages_from source do |m, offset, entry|
      ## if the entry exists on disk
      if entry && !opts[:overwrite_state]
        m.labels = entry[:label].split(/\s+/).map { |x| x.intern }
      else
        ## m.labels defaults to labels from the source
        m.labels -= [:inbox] if opts[:archive]
        m.labels -= [:unread] if opts[:read]
      end

      if Time.now - last_update > 60
        last_update = Time.now
        elapsed = last_update - start
        pctdone = source.respond_to?(:pct_done) ? source.pct_done : 100.0 * (source.cur_offset.to_f - source.start_offset).to_f / (source.end_offset - source.start_offset).to_f
        remaining = (100.0 - pctdone) * (elapsed.to_f / pctdone)
        puts "## #{num} (#{pctdone}% done) read; #{elapsed.to_time_s} elapsed; est. #{remaining.to_time_s} remaining"
      end

      ## update if...
      if entry.nil? # it's a new message; or
        puts "Adding message at #{offset}, labels: #{m.labels * ' '}" if opts[:verbose]
        num_added += 1
        found[m.id] = true
        m
      elsif opts[:full_rebuild] || # we're updating everyone; or
          (opts[:rebuild] && (entry[:source_id].to_i != source.id || entry[:source_info].to_i != offset)) # we're updating just the changed ones
        puts "Updating message at #{offset} (from #{m.from.longname}, subject '#{m.subj}'), source #{entry[:source_id]} => #{source.id}, offset #{entry[:source_info]} => #{offset}, labels: {#{m.labels * ', '}}" if opts[:verbose]
        num_updated += 1 unless found[m.id]
        found[m.id] = true
        m
      else
        found[m.id] = true
        nil
      end
    end
    puts "Added #{num_added}, updated #{num_updated} messages from #{source}."
  end
ensure
  puts "Saving index and sources..."
  index.save
  Redwood::finish
end

## delete any messages in the index that claim they're from one of
## these sources, but that we didn't see.
##
## kinda crappy code here, because we delve directly into the Ferret
## API.
##
## TODO: move this to Index, i suppose.
if opts[:rebuild] || opts[:full_rebuild]
  puts "Deleting missing messages from the index..."
  numdel = num = 0
  sources.each do |source|
    raise "no source id for #{source}" unless source.id
    q = "+source_id:#{source.id}"
    q += " +source_info: >= #{opts[:start_at]}" if opts[:start_at]
    num += index.index.search_each(q, :limit => :all) do |docid, score|
      mid = index.index[docid][:message_id]
#      puts "got #{mid}"
      next if found[mid]
      puts "Deleting #{mid}" if opts[:verbose]
      index.index.delete docid
      numdel += 1
    end
  end
  puts "Deleted #{numdel} / #{num} messages"
end

if opts[:optimize]
  puts "Optimizing index..."
  optt = time { index.index.optimize }
  puts "Optimized index of size #{index.size} in #{optt}s."
end
