sup

A curses threads-with-tags style email client

sup.git

git clone https://supmua.dev/git/sup/
commit 47133afaad9c0a0eb34cf1b4fb77a251388a4359
parent d41f2472b3c45b3fbaaa9e02cc7ac62c4295c575
Author: William Morgan <wmorgan-sup@masanjin.net>
Date:   Mon, 18 May 2009 07:10:27 -0700

Merge branch 'various-mbox-fixes' into next

Conflicts:

	lib/sup/mbox.rb
	test/test_mbox_parsing.rb

Diffstat:
M lib/sup/mbox.rb | 16 +++++++++++++++-
M lib/sup/mbox/loader.rb | 14 +++++++-------
M test/test_header_parsing.rb | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 72 insertions(+), 8 deletions(-)
diff --git a/lib/sup/mbox.rb b/lib/sup/mbox.rb
@@ -6,6 +6,20 @@ require "sup/rfc2047"
 module Redwood
 
 module MBox
-  BREAK_RE = /^From \S+@\S+ /
+  BREAK_RE = /^From \S+ (.+)$/
+
+  def is_break_line? l
+    l =~ BREAK_RE or return false
+    time = $1
+    begin
+      ## hack -- make Time.parse fail when trying to substitute values from Time.now
+      Time.parse time, 0
+      true
+    rescue NoMethodError
+      Redwood::log "found invalid date in potential mbox split line, not splitting: #{l.inspect}"
+      false
+    end
+  end
+  module_function :is_break_line?
 end
 end
diff --git a/lib/sup/mbox/loader.rb b/lib/sup/mbox/loader.rb
@@ -9,7 +9,7 @@ class Loader < Source
   attr_accessor :labels
 
   ## uri_or_fp is horrific. need to refactor.
-  def initialize uri_or_fp, start_offset=nil, usual=true, archived=false, id=nil, labels=[]
+  def initialize uri_or_fp, start_offset=0, usual=true, archived=false, id=nil, labels=[]
     @mutex = Mutex.new
     @labels = ((labels || []) - LabelManager::RESERVED_LABELS).uniq.freeze
 
@@ -56,7 +56,7 @@ class Loader < Source
     @mutex.synchronize do
       @f.seek offset
       l = @f.gets
-      unless l =~ BREAK_RE
+      unless MBox::is_break_line? l
         raise OutOfSyncSourceError, "mismatch in mbox file offset #{offset.inspect}: #{l.inspect}." 
       end
       header = parse_raw_email_header @f
@@ -72,7 +72,7 @@ class Loader < Source
         ## "From" at the start of a message body line.
         string = ""
         l = @f.gets
-        string << l until @f.eof? || (l = @f.gets) =~ BREAK_RE
+        string << l until @f.eof? || MBox::is_break_line?(l = @f.gets)
         RMail::Parser.read string
       rescue RMail::Parser::Error => e
         raise FatalSourceError, "error parsing mbox file: #{e.message}"
@@ -85,7 +85,7 @@ class Loader < Source
     @mutex.synchronize do
       @f.seek cur_offset
       string = ""
-      until @f.eof? || (l = @f.gets) =~ BREAK_RE
+      until @f.eof? || MBox::is_break_line?(l = @f.gets)
         string << l
       end
       self.cur_offset += string.length
@@ -119,7 +119,7 @@ class Loader < Source
     @mutex.synchronize do
       @f.seek offset
       yield @f.gets
-      until @f.eof? || (l = @f.gets) =~ BREAK_RE
+      until @f.eof? || MBox::is_break_line?(l = @f.gets)
         yield l
       end
     end
@@ -140,7 +140,7 @@ class Loader < Source
         ## 2. at the beginning of an mbox separator (in all other
         ##    cases).
 
-        l = @f.gets or raise "next while at EOF"
+        l = @f.gets or return nil
         if l =~ /^\s*$/ # case 1
           returned_offset = @f.tell
           @f.gets # now we're at a BREAK_RE, so skip past it
@@ -150,7 +150,7 @@ class Loader < Source
         end
 
         while(line = @f.gets)
-          break if line =~ BREAK_RE
+          break if MBox::is_break_line? line
           next_offset = @f.tell
         end
       end
diff --git a/test/test_header_parsing.rb b/test/test_header_parsing.rb
@@ -104,4 +104,54 @@ EOS
   assert_equal "Bob <bob@bob.com>", h["from"]
   assert_nil h["to"]
   end
+
+  def test_from_line_splitting
+    l = MBox::Loader.new StringIO.new(<<EOS)
+From sup-talk-bounces@rubyforge.org Mon Apr 27 12:56:18 2009
+From: Bob <bob@bob.com>
+To: a dear friend
+
+Hello there friend. How are you?
+
+From sea to shining sea
+
+From bob@bob.com I get only spam.
+
+From bob@bob.com   
+
+From bob@bob.com
+
+(that second one has spaces at the endj
+
+This is the end of the email.
+EOS
+    offset, labels = l.next
+    assert_equal 0, offset
+    offset, labels = l.next
+    assert_nil offset
+  end
+
+  def test_more_from_line_splitting
+    l = MBox::Loader.new StringIO.new(<<EOS)
+From sup-talk-bounces@rubyforge.org Mon Apr 27 12:56:18 2009
+From: Bob <bob@bob.com>
+To: a dear friend
+
+Hello there friend. How are you?
+
+From bob@bob.com Mon Apr 27 12:56:19 2009
+From: Bob <bob@bob.com>
+To: a dear friend
+
+Hello again! Would you like to buy my products?
+EOS
+    offset, labels = l.next
+    assert_not_nil offset
+
+    offset, labels = l.next
+    assert_not_nil offset
+
+    offset, labels = l.next
+    assert_nil offset
+  end
 end