commit a606565c37df04102ecd0930ebeb6b8209834dce
parent 918ba611f8d6aac0bcc1cd6c027a00c8bc39d9b6
Author: William Morgan <wmorgan-sup@masanjin.net>
Date: Sat, 24 May 2008 21:19:01 -0700
Merge commit 'origin/header-parsing-fix'
Diffstat:
2 files changed, 132 insertions(+), 17 deletions(-)
diff --git a/lib/sup/mbox.rb b/lib/sup/mbox.rb
@@ -11,6 +11,7 @@ module Redwood
## TODO: move functionality to somewhere better, like message.rb
module MBox
BREAK_RE = /^From \S+/
+ HEADER_RE = /\s*(.*?\S)\s*/
def read_header f
header = {}
@@ -20,26 +21,26 @@ module MBox
## when scanning over large mbox files.
while(line = f.gets)
case line
- when /^(From):\s*(.*?)\s*$/i,
- /^(To):\s*(.*?)\s*$/i,
- /^(Cc):\s*(.*?)\s*$/i,
- /^(Bcc):\s*(.*?)\s*$/i,
- /^(Subject):\s*(.*?)\s*$/i,
- /^(Date):\s*(.*?)\s*$/i,
- /^(References):\s*(.*?)\s*$/i,
- /^(In-Reply-To):\s*(.*?)\s*$/i,
- /^(Reply-To):\s*(.*?)\s*$/i,
- /^(List-Post):\s*(.*?)\s*$/i,
- /^(List-Subscribe):\s*(.*?)\s*$/i,
- /^(List-Unsubscribe):\s*(.*?)\s*$/i,
- /^(Status):\s*(.*?)\s*$/i: header[last = $1] = $2
- when /^(Message-Id):\s*(.*?)\s*$/i: header[mid_field = last = $1] = $2
+ when /^(From):#{HEADER_RE}$/i,
+ /^(To):#{HEADER_RE}$/i,
+ /^(Cc):#{HEADER_RE}$/i,
+ /^(Bcc):#{HEADER_RE}$/i,
+ /^(Subject):#{HEADER_RE}$/i,
+ /^(Date):#{HEADER_RE}$/i,
+ /^(References):#{HEADER_RE}$/i,
+ /^(In-Reply-To):#{HEADER_RE}$/i,
+ /^(Reply-To):#{HEADER_RE}$/i,
+ /^(List-Post):#{HEADER_RE}$/i,
+ /^(List-Subscribe):#{HEADER_RE}$/i,
+ /^(List-Unsubscribe):#{HEADER_RE}$/i,
+ /^(Status):#{HEADER_RE}$/i: header[last = $1] = $2
+ when /^(Message-Id):#{HEADER_RE}$/i: header[mid_field = last = $1] = $2
## these next three can occur multiple times, and we want the
## first one
- when /^(Delivered-To):\s*(.*)$/i,
- /^(X-Original-To):\s*(.*)$/i,
- /^(Envelope-To):\s*(.*)$/i: header[last = $1] ||= $2
+ when /^(Delivered-To):#{HEADER_RE}$/i,
+ /^(X-Original-To):#{HEADER_RE}$/i,
+ /^(Envelope-To):#{HEADER_RE}$/i: header[last = $1] ||= $2
when /^\r*$/: break
when /^\S+:/: last = nil # some other header we don't care about
diff --git a/test/test_mbox_parsing.rb b/test/test_mbox_parsing.rb
@@ -0,0 +1,114 @@
+#!/usr/bin/ruby
+
+require 'test/unit'
+require 'sup'
+require 'stringio'
+
+include Redwood
+
+class TestMessage < Test::Unit::TestCase
+ def setup
+ end
+
+ def teardown
+ end
+
+ def test_normal_headers
+ h = MBox.read_header StringIO.new(<<EOS)
+From: Bob <bob@bob.com>
+To: Sally <sally@sally.com>
+EOS
+
+ assert_equal "Bob <bob@bob.com>", h["From"]
+ assert_equal "Sally <sally@sally.com>", h["To"]
+ assert_nil h["Message-Id"]
+ end
+
+ ## this is shitty behavior in retrospect, but it's built in now.
+ def test_message_id_stripping
+ h = MBox.read_header StringIO.new("Message-Id: <one@bob.com>\n")
+ assert_equal "one@bob.com", h["Message-Id"]
+
+ h = MBox.read_header StringIO.new("Message-Id: one@bob.com\n")
+ assert_equal "one@bob.com", h["Message-Id"]
+ end
+
+ def test_multiline
+ h = MBox.read_header StringIO.new(<<EOS)
+From: Bob <bob@bob.com>
+Subject: one two three
+ four five six
+To: Sally <sally@sally.com>
+References: seven
+ eight
+Seven: Eight
+EOS
+
+ assert_equal "one two three four five six", h["Subject"]
+ assert_equal "Sally <sally@sally.com>", h["To"]
+ assert_equal "seven eight", h["References"]
+ end
+
+ def test_ignore_spacing
+ variants = [
+ "Subject:one two three end\n",
+ "Subject: one two three end\n",
+ "Subject: one two three end \n",
+ ]
+ variants.each do |s|
+ h = MBox.read_header StringIO.new(s)
+ assert_equal "one two three end", h["Subject"]
+ end
+ end
+
+ def test_message_id_ignore_spacing
+ variants = [
+ "Message-Id: <one@bob.com> \n",
+ "Message-Id: one@bob.com \n",
+ "Message-Id:<one@bob.com> \n",
+ "Message-Id:one@bob.com \n",
+ ]
+ variants.each do |s|
+ h = MBox.read_header StringIO.new(s)
+ assert_equal "one@bob.com", h["Message-Id"]
+ end
+ end
+
+ def test_ignore_empty_lines
+ variants = [
+ "",
+ "Message-Id: \n",
+ "Message-Id:\n",
+ ]
+ variants.each do |s|
+ h = MBox.read_header StringIO.new(s)
+ assert_nil h["Message-Id"]
+ end
+ end
+
+ def test_detect_end_of_headers
+ h = MBox.read_header StringIO.new(<<EOS)
+From: Bob <bob@bob.com>
+
+To: a dear friend
+EOS
+ assert_equal "Bob <bob@bob.com>", h["From"]
+ assert_nil h["To"]
+
+ h = MBox.read_header StringIO.new(<<EOS)
+From: Bob <bob@bob.com>
+\r
+To: a dear friend
+EOS
+ assert_equal "Bob <bob@bob.com>", h["From"]
+ assert_nil h["To"]
+
+ h = MBox.read_header StringIO.new(<<EOS)
+From: Bob <bob@bob.com>
+\r\n\r
+To: a dear friend
+EOS
+ assert_equal "Bob <bob@bob.com>", h["From"]
+ assert_nil h["To"]
+ end
+end