commit d06b80220e17580c61750b6b29f02c0ca2a7bd57
parent 733defd3698de5ac416866499b6e7f907604dea8
Author: William Morgan <wmorgan-sup@masanjin.net>
Date: Tue, 28 Apr 2009 09:34:27 -0400
fix mbox splitting regexp
I dunno. This helps with the "From problem", but at the expense of being
too specific than the mbox spec really demands. I don't think there's a
really right solution, in general (due to the mbox format being a
fundamentally broken one), but I'm hoping this will work with all modern
mbox files.
Diffstat:
2 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/lib/sup/mbox.rb b/lib/sup/mbox.rb
@@ -10,7 +10,7 @@ module Redwood
##
## TODO: move functionality to somewhere better, like message.rb
module MBox
- BREAK_RE = /^From \S+/
+ BREAK_RE = /^From \S+@\S+ /
HEADER_RE = /\s*(.*?)\s*/
def read_header f
diff --git a/lib/sup/mbox/loader.rb b/lib/sup/mbox/loader.rb
@@ -68,13 +68,12 @@ class Loader < Source
@mutex.synchronize do
@f.seek offset
begin
- RMail::Mailbox::MBoxReader.new(@f).each_message do |input|
- m = RMail::Parser.read(input)
- if m.body && m.body.is_a?(String)
- m.body.gsub!(/^>From /, "From ")
- end
- return m
- end
+ ## don't use RMail::Mailbox::MBoxReader because it doesn't properly ignore
+ ## "From" at the start of a message body line.
+ string = ""
+ l = @f.gets
+ string << l until @f.eof? || (l = @f.gets) =~ BREAK_RE
+ RMail::Parser.read string
rescue RMail::Parser::Error => e
raise FatalSourceError, "error parsing mbox file: #{e.message}"
end