commit 94407cd41003ed685b454aebd3131096424da393
parent 4f1795e4aba8dc607ddb4abf74f0b14b8ccb722c
Author: Rich Lane <rlane@club.cc.cmu.edu>
Date: Sun, 21 Mar 2010 17:53:12 -0700
move mbox source into mbox.rb
Diffstat:
| M |
lib/sup/mbox.rb |
| |
182 |
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
|
| D |
lib/sup/mbox/loader.rb |
| |
180 |
-------------------------------------------------------------------------------
|
2 files changed, 178 insertions(+), 184 deletions(-)
diff --git a/lib/sup/mbox.rb b/lib/sup/mbox.rb
@@ -1,11 +1,182 @@
-require "sup/mbox/loader"
+require 'rmail'
+require 'uri'
+require 'set'
module Redwood
-module MBox
+class MBox < Source
BREAK_RE = /^From \S+ (.+)$/
- def is_break_line? l
+ include SerializeLabelsNicely
+ yaml_properties :uri, :cur_offset, :usual, :archived, :id, :labels
+
+ attr_reader :labels
+
+ ## uri_or_fp is horrific. need to refactor.
+ def initialize uri_or_fp, start_offset=nil, usual=true, archived=false, id=nil, labels=nil
+ @mutex = Mutex.new
+ @labels = Set.new((labels || []) - LabelManager::RESERVED_LABELS)
+
+ case uri_or_fp
+ when String
+ uri = URI(Source.expand_filesystem_uri(uri_or_fp))
+ raise ArgumentError, "not an mbox uri" unless uri.scheme == "mbox"
+ raise ArgumentError, "mbox URI ('#{uri}') cannot have a host: #{uri.host}" if uri.host
+ raise ArgumentError, "mbox URI must have a path component" unless uri.path
+ @f = File.open uri.path, 'rb'
+ @path = uri.path
+ else
+ @f = uri_or_fp
+ @path = uri_or_fp.path
+ end
+
+ start_offset ||= 0
+ super uri_or_fp, start_offset, usual, archived, id
+ end
+
+ def file_path; @path end
+ def is_source_for? uri; super || (self.uri.is_a?(String) && (URI(Source.expand_filesystem_uri(uri)) == URI(Source.expand_filesystem_uri(self.uri)))) end
+
+ def self.suggest_labels_for path
+ ## heuristic: use the filename as a label, unless the file
+ ## has a path that probably represents an inbox.
+ if File.dirname(path) =~ /\b(var|usr|spool)\b/
+ []
+ else
+ [File.basename(path).downcase.intern]
+ end
+ end
+
+ def check
+ if (cur_offset ||= start_offset) > end_offset
+ raise OutOfSyncSourceError, "mbox file is smaller than last recorded message offset. Messages have probably been deleted by another client."
+ end
+ end
+
+ def start_offset; 0; end
+ def end_offset; File.size @f; end
+
+ def load_header offset
+ header = nil
+ @mutex.synchronize do
+ @f.seek offset
+ l = @f.gets
+ unless MBox::is_break_line? l
+ raise OutOfSyncSourceError, "mismatch in mbox file offset #{offset.inspect}: #{l.inspect}."
+ end
+ header = parse_raw_email_header @f
+ end
+ header
+ end
+
+ def load_message offset
+ @mutex.synchronize do
+ @f.seek offset
+ begin
+ ## don't use RMail::Mailbox::MBoxReader because it doesn't properly ignore
+ ## "From" at the start of a message body line.
+ string = ""
+ l = @f.gets
+ string << l until @f.eof? || MBox::is_break_line?(l = @f.gets)
+ RMail::Parser.read string
+ rescue RMail::Parser::Error => e
+ raise FatalSourceError, "error parsing mbox file: #{e.message}"
+ end
+ end
+ end
+
+ ## scan forward until we're at the valid start of a message
+ def correct_offset!
+ @mutex.synchronize do
+ @f.seek cur_offset
+ string = ""
+ until @f.eof? || MBox::is_break_line?(l = @f.gets)
+ string << l
+ end
+ self.cur_offset += string.length
+ end
+ end
+
+ def raw_header offset
+ ret = ""
+ @mutex.synchronize do
+ @f.seek offset
+ until @f.eof? || (l = @f.gets) =~ /^\r*$/
+ ret << l
+ end
+ end
+ ret
+ end
+
+ def raw_message offset
+ ret = ""
+ each_raw_message_line(offset) { |l| ret << l }
+ ret
+ end
+
+ def store_message date, from_email, &block
+ need_blank = File.exists?(@filename) && !File.zero?(@filename)
+ File.open(@filename, "ab") do |f|
+ f.puts if need_blank
+ f.puts "From #{from_email} #{date.rfc2822}"
+ yield f
+ end
+ end
+
+ ## apparently it's a million times faster to call this directly if
+ ## we're just moving messages around on disk, than reading things
+ ## into memory with raw_message.
+ ##
+ ## i hoped never to have to move shit around on disk but
+ ## sup-sync-back has to do it.
+ def each_raw_message_line offset
+ @mutex.synchronize do
+ @f.seek offset
+ yield @f.gets
+ until @f.eof? || MBox::is_break_line?(l = @f.gets)
+ yield l
+ end
+ end
+ end
+
+ def next
+ returned_offset = nil
+ next_offset = cur_offset
+
+ begin
+ @mutex.synchronize do
+ @f.seek cur_offset
+
+ ## cur_offset could be at one of two places here:
+
+ ## 1. before a \n and a mbox separator, if it was previously at
+ ## EOF and a new message was added; or,
+ ## 2. at the beginning of an mbox separator (in all other
+ ## cases).
+
+ l = @f.gets or return nil
+ if l =~ /^\s*$/ # case 1
+ returned_offset = @f.tell
+ @f.gets # now we're at a BREAK_RE, so skip past it
+ else # case 2
+ returned_offset = cur_offset
+ ## we've already skipped past the BREAK_RE, so just go
+ end
+
+ while(line = @f.gets)
+ break if MBox::is_break_line? line
+ next_offset = @f.tell
+ end
+ end
+ rescue SystemCallError, IOError => e
+ raise FatalSourceError, "Error reading #{@f.path}: #{e.message}"
+ end
+
+ self.cur_offset = next_offset
+ [returned_offset, (labels + [:unread])]
+ end
+
+ def self.is_break_line? l
l =~ BREAK_RE or return false
time = $1
begin
@@ -17,6 +188,9 @@ module MBox
false
end
end
- module_function :is_break_line?
+
+ class Loader < self
+ yaml_properties :uri, :cur_offset, :usual, :archived, :id, :labels
+ end
end
end
diff --git a/lib/sup/mbox/loader.rb b/lib/sup/mbox/loader.rb
@@ -1,180 +0,0 @@
-require 'rmail'
-require 'uri'
-require 'set'
-
-module Redwood
-module MBox
-
-class Loader < Source
- include SerializeLabelsNicely
- yaml_properties :uri, :cur_offset, :usual, :archived, :id, :labels
-
- attr_reader :labels
-
- ## uri_or_fp is horrific. need to refactor.
- def initialize uri_or_fp, start_offset=nil, usual=true, archived=false, id=nil, labels=nil
- @mutex = Mutex.new
- @labels = Set.new((labels || []) - LabelManager::RESERVED_LABELS)
-
- case uri_or_fp
- when String
- uri = URI(Source.expand_filesystem_uri(uri_or_fp))
- raise ArgumentError, "not an mbox uri" unless uri.scheme == "mbox"
- raise ArgumentError, "mbox URI ('#{uri}') cannot have a host: #{uri.host}" if uri.host
- raise ArgumentError, "mbox URI must have a path component" unless uri.path
- @f = File.open uri.path, 'rb'
- @path = uri.path
- else
- @f = uri_or_fp
- @path = uri_or_fp.path
- end
-
- start_offset ||= 0
- super uri_or_fp, start_offset, usual, archived, id
- end
-
- def file_path; @path end
- def is_source_for? uri; super || (self.uri.is_a?(String) && (URI(Source.expand_filesystem_uri(uri)) == URI(Source.expand_filesystem_uri(self.uri)))) end
-
- def self.suggest_labels_for path
- ## heuristic: use the filename as a label, unless the file
- ## has a path that probably represents an inbox.
- if File.dirname(path) =~ /\b(var|usr|spool)\b/
- []
- else
- [File.basename(path).downcase.intern]
- end
- end
-
- def check
- if (cur_offset ||= start_offset) > end_offset
- raise OutOfSyncSourceError, "mbox file is smaller than last recorded message offset. Messages have probably been deleted by another client."
- end
- end
-
- def start_offset; 0; end
- def end_offset; File.size @f; end
-
- def load_header offset
- header = nil
- @mutex.synchronize do
- @f.seek offset
- l = @f.gets
- unless MBox::is_break_line? l
- raise OutOfSyncSourceError, "mismatch in mbox file offset #{offset.inspect}: #{l.inspect}."
- end
- header = parse_raw_email_header @f
- end
- header
- end
-
- def load_message offset
- @mutex.synchronize do
- @f.seek offset
- begin
- ## don't use RMail::Mailbox::MBoxReader because it doesn't properly ignore
- ## "From" at the start of a message body line.
- string = ""
- l = @f.gets
- string << l until @f.eof? || MBox::is_break_line?(l = @f.gets)
- RMail::Parser.read string
- rescue RMail::Parser::Error => e
- raise FatalSourceError, "error parsing mbox file: #{e.message}"
- end
- end
- end
-
- ## scan forward until we're at the valid start of a message
- def correct_offset!
- @mutex.synchronize do
- @f.seek cur_offset
- string = ""
- until @f.eof? || MBox::is_break_line?(l = @f.gets)
- string << l
- end
- self.cur_offset += string.length
- end
- end
-
- def raw_header offset
- ret = ""
- @mutex.synchronize do
- @f.seek offset
- until @f.eof? || (l = @f.gets) =~ /^\r*$/
- ret << l
- end
- end
- ret
- end
-
- def raw_message offset
- ret = ""
- each_raw_message_line(offset) { |l| ret << l }
- ret
- end
-
- def store_message date, from_email, &block
- need_blank = File.exists?(@filename) && !File.zero?(@filename)
- File.open(@filename, "ab") do |f|
- f.puts if need_blank
- f.puts "From #{from_email} #{date.rfc2822}"
- yield f
- end
- end
-
- ## apparently it's a million times faster to call this directly if
- ## we're just moving messages around on disk, than reading things
- ## into memory with raw_message.
- ##
- ## i hoped never to have to move shit around on disk but
- ## sup-sync-back has to do it.
- def each_raw_message_line offset
- @mutex.synchronize do
- @f.seek offset
- yield @f.gets
- until @f.eof? || MBox::is_break_line?(l = @f.gets)
- yield l
- end
- end
- end
-
- def next
- returned_offset = nil
- next_offset = cur_offset
-
- begin
- @mutex.synchronize do
- @f.seek cur_offset
-
- ## cur_offset could be at one of two places here:
-
- ## 1. before a \n and a mbox separator, if it was previously at
- ## EOF and a new message was added; or,
- ## 2. at the beginning of an mbox separator (in all other
- ## cases).
-
- l = @f.gets or return nil
- if l =~ /^\s*$/ # case 1
- returned_offset = @f.tell
- @f.gets # now we're at a BREAK_RE, so skip past it
- else # case 2
- returned_offset = cur_offset
- ## we've already skipped past the BREAK_RE, so just go
- end
-
- while(line = @f.gets)
- break if MBox::is_break_line? line
- next_offset = @f.tell
- end
- end
- rescue SystemCallError, IOError => e
- raise FatalSourceError, "Error reading #{@f.path}: #{e.message}"
- end
-
- self.cur_offset = next_offset
- [returned_offset, (labels + [:unread])]
- end
-end
-
-end
-end