sup

A curses threads-with-tags style email client

sup.git

git clone https://supmua.dev/git/sup/
commit a606565c37df04102ecd0930ebeb6b8209834dce
parent 918ba611f8d6aac0bcc1cd6c027a00c8bc39d9b6
Author: William Morgan <wmorgan-sup@masanjin.net>
Date:   Sat, 24 May 2008 21:19:01 -0700

Merge commit 'origin/header-parsing-fix'

Diffstat:
M lib/sup/mbox.rb | 35 ++++++++++++++++++-----------------
A test/test_mbox_parsing.rb | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 132 insertions(+), 17 deletions(-)
diff --git a/lib/sup/mbox.rb b/lib/sup/mbox.rb
@@ -11,6 +11,7 @@ module Redwood
 ## TODO: move functionality to somewhere better, like message.rb
 module MBox
   BREAK_RE = /^From \S+/
+  HEADER_RE = /\s*(.*?\S)\s*/
 
   def read_header f
     header = {}
@@ -20,26 +21,26 @@ module MBox
     ## when scanning over large mbox files.
     while(line = f.gets)
       case line
-      when /^(From):\s*(.*?)\s*$/i,
-        /^(To):\s*(.*?)\s*$/i,
-        /^(Cc):\s*(.*?)\s*$/i,
-        /^(Bcc):\s*(.*?)\s*$/i,
-        /^(Subject):\s*(.*?)\s*$/i,
-        /^(Date):\s*(.*?)\s*$/i,
-        /^(References):\s*(.*?)\s*$/i,
-        /^(In-Reply-To):\s*(.*?)\s*$/i,
-        /^(Reply-To):\s*(.*?)\s*$/i,
-        /^(List-Post):\s*(.*?)\s*$/i,
-        /^(List-Subscribe):\s*(.*?)\s*$/i,
-        /^(List-Unsubscribe):\s*(.*?)\s*$/i,
-        /^(Status):\s*(.*?)\s*$/i: header[last = $1] = $2
-      when /^(Message-Id):\s*(.*?)\s*$/i: header[mid_field = last = $1] = $2
+      when /^(From):#{HEADER_RE}$/i,
+        /^(To):#{HEADER_RE}$/i,
+        /^(Cc):#{HEADER_RE}$/i,
+        /^(Bcc):#{HEADER_RE}$/i,
+        /^(Subject):#{HEADER_RE}$/i,
+        /^(Date):#{HEADER_RE}$/i,
+        /^(References):#{HEADER_RE}$/i,
+        /^(In-Reply-To):#{HEADER_RE}$/i,
+        /^(Reply-To):#{HEADER_RE}$/i,
+        /^(List-Post):#{HEADER_RE}$/i,
+        /^(List-Subscribe):#{HEADER_RE}$/i,
+        /^(List-Unsubscribe):#{HEADER_RE}$/i,
+        /^(Status):#{HEADER_RE}$/i: header[last = $1] = $2
+      when /^(Message-Id):#{HEADER_RE}$/i: header[mid_field = last = $1] = $2
 
       ## these next three can occur multiple times, and we want the
       ## first one
-      when /^(Delivered-To):\s*(.*)$/i,
-        /^(X-Original-To):\s*(.*)$/i,
-        /^(Envelope-To):\s*(.*)$/i: header[last = $1] ||= $2
+      when /^(Delivered-To):#{HEADER_RE}$/i,
+        /^(X-Original-To):#{HEADER_RE}$/i,
+        /^(Envelope-To):#{HEADER_RE}$/i: header[last = $1] ||= $2
 
       when /^\r*$/: break
       when /^\S+:/: last = nil # some other header we don't care about
diff --git a/test/test_mbox_parsing.rb b/test/test_mbox_parsing.rb
@@ -0,0 +1,114 @@
+#!/usr/bin/ruby
+
+require 'test/unit'
+require 'sup'
+require 'stringio'
+
+include Redwood
+
+class TestMessage < Test::Unit::TestCase
+  def setup
+  end
+
+  def teardown
+  end
+
+  def test_normal_headers
+    h = MBox.read_header StringIO.new(<<EOS)
+From: Bob <bob@bob.com>
+To: Sally <sally@sally.com>
+EOS
+
+    assert_equal "Bob <bob@bob.com>", h["From"]
+    assert_equal "Sally <sally@sally.com>", h["To"]
+    assert_nil h["Message-Id"]
+  end
+
+  ## this is shitty behavior in retrospect, but it's built in now.
+  def test_message_id_stripping
+    h = MBox.read_header StringIO.new("Message-Id: <one@bob.com>\n")
+    assert_equal "one@bob.com", h["Message-Id"]
+
+    h = MBox.read_header StringIO.new("Message-Id: one@bob.com\n")
+    assert_equal "one@bob.com", h["Message-Id"]
+  end
+
+  def test_multiline
+    h = MBox.read_header StringIO.new(<<EOS)
+From: Bob <bob@bob.com>
+Subject: one two three
+  four five six
+To: Sally <sally@sally.com>
+References: seven
+  eight
+Seven: Eight
+EOS
+
+    assert_equal "one two three four five six", h["Subject"]
+    assert_equal "Sally <sally@sally.com>", h["To"]
+    assert_equal "seven eight", h["References"]
+  end
+
+  def test_ignore_spacing
+    variants = [
+      "Subject:one two  three   end\n",
+      "Subject:    one two  three   end\n",
+      "Subject:   one two  three   end    \n",
+    ]
+    variants.each do |s|
+      h = MBox.read_header StringIO.new(s)
+      assert_equal "one two  three   end", h["Subject"]
+    end
+  end
+
+  def test_message_id_ignore_spacing
+    variants = [
+      "Message-Id:     <one@bob.com>       \n",
+      "Message-Id:      one@bob.com        \n",
+      "Message-Id:<one@bob.com>       \n",
+      "Message-Id:one@bob.com       \n",
+    ]
+    variants.each do |s|
+      h = MBox.read_header StringIO.new(s)
+      assert_equal "one@bob.com", h["Message-Id"]
+    end
+  end
+
+  def test_ignore_empty_lines
+    variants = [
+      "",
+      "Message-Id:       \n",
+      "Message-Id:\n",
+    ]
+    variants.each do |s|
+      h = MBox.read_header StringIO.new(s)
+      assert_nil h["Message-Id"]
+    end
+  end
+
+  def test_detect_end_of_headers
+    h = MBox.read_header StringIO.new(<<EOS)
+From: Bob <bob@bob.com>
+
+To: a dear friend
+EOS
+  assert_equal "Bob <bob@bob.com>", h["From"]
+  assert_nil h["To"]
+
+  h = MBox.read_header StringIO.new(<<EOS)
+From: Bob <bob@bob.com>
+\r
+To: a dear friend
+EOS
+  assert_equal "Bob <bob@bob.com>", h["From"]
+  assert_nil h["To"]
+
+  h = MBox.read_header StringIO.new(<<EOS)
+From: Bob <bob@bob.com>
+\r\n\r
+To: a dear friend
+EOS
+  assert_equal "Bob <bob@bob.com>", h["From"]
+  assert_nil h["To"]
+  end
+end