bin/sup-recover-sources (2673B) - raw
1 #!/usr/bin/env ruby
2
3 $:.unshift File.join(File.dirname(__FILE__), *%w[.. lib])
4
5 require 'optparse'
6
7 $opts = {
8 :unusual => false,
9 :archive => false,
10 :scan_num => 10,
11 }
12
13
14 OPTIONPARSERSUCKS = "\n" + " " * 38
15 OptionParser.new do |opts|
16 opts.banner = <<EOS
17 Usage: sup-recover-sources [options] <source>+
18
19 Rebuilds a lost sources.yaml file by reading messages from a list of
20 sources and determining, for each source, the most prevalent
21 'source_id' field of messages from that source in the index.
22
23 The only non-deterministic component to this is that if the same
24 message appears in multiple sources, those sources may be
25 mis-diagnosed by this program.
26
27 If the first N messages (--scan-num below) all have the same source_id
28 in the index, the source will be added to sources.yaml. Otherwise, the
29 distribution will be printed, and you will have to add it by hand.
30
31 The offset pointer into the sources will be set to the end of the source,
32 so you will have to run sup-import --rebuild for each new source after
33 doing this.
34
35 Options include:
36 EOS
37
38 opts.on("--unusual", "Mark sources as 'unusual'. Only usual#{OPTIONPARSERSUCKS}sources will be polled by hand. Default:#{OPTIONPARSERSUCKS}#{$opts[:unusual]}.") { $opts[:unusual] = true }
39
40 opts.on("--archive", "Mark sources as 'archive'. New messages#{OPTIONPARSERSUCKS}from these sources will not appear in#{OPTIONPARSERSUCKS}the inbox. Default: #{$opts[:archive]}.") { $opts[:archive] = true }
41
42 opts.on("--scan-num N", Integer, "Number of messages to scan per source.#{OPTIONPARSERSUCKS}Default: #{$opts[:scan_num]}.") do |n|
43 $opts[:scan_num] = n
44 end
45
46 opts.on_tail("-h", "--help", "Show this message") do
47 puts opts
48 exit
49 end
50 end.parse(ARGV)
51
52 require "sup"
53 Redwood::start
54 puts "loading index..."
55 index = Redwood::Index.init
56 index.load
57 puts "loaded index of #{index.size} messages"
58
59 ARGV.each do |fn|
60 next if Redwood::SourceManager.source_for fn
61
62 ## TODO: merge this code with the same snippet in import
63 source = Redwood::MBox.new(fn, nil, !$opts[:unusual], $opts[:archive])
64
65 source_ids = Hash.new 0
66 count = 0
67 source.each do |offset, labels|
68 m = Redwood::Message.new :source => source, :source_info => offset
69 m.load_from_source!
70 source_id = Redwood::SourceManager.source_for_id m.id
71 next unless source_id
72 source_ids[source_id] += 1
73 count += 1
74 break if count == $opts[:scan_num]
75 end
76
77 if source_ids.size == 1
78 id = source_ids.keys.first.to_i
79 puts "assigned #{source} to #{source_ids.keys.first}"
80 source.id = id
81 Redwood::SourceManager.add_source source
82 else
83 puts ">> unable to determine #{source}: #{source_ids.inspect}"
84 end
85 end
86
87 index.save