commit a1669cab5edc11e2260898f4592e7f6a1c488544
parent b3d5d414554cbcff951e2497bb98957dead0fb8c
Author: William Morgan <wmorgan-sup@masanjin.net>
Date: Fri, 1 Jan 2010 10:35:36 -0800
Merge branch 'ruby-1.9-encoding' into next
Diffstat:
6 files changed, 68 insertions(+), 34 deletions(-)
diff --git a/lib/sup/maildir.rb b/lib/sup/maildir.rb
@@ -59,7 +59,7 @@ class Maildir < Source
File.stat(tmp_path)
rescue Errno::ENOENT #this is what we want.
begin
- File.open(tmp_path, 'w') do |f|
+ File.open(tmp_path, 'wb:BINARY') do |f|
yield f #provide a writable interface for the caller
f.fsync
end
@@ -207,7 +207,7 @@ private
def with_file_for id
fn = @ids_to_fns[id] or raise OutOfSyncSourceError, "No such id: #{id.inspect}."
begin
- File.open(fn) { |f| yield f }
+ File.open(fn, 'rb:BINARY') { |f| yield f }
rescue SystemCallError, IOError => e
raise FatalSourceError, "Problem reading file for id #{id.inspect}: #{fn.inspect}: #{e.message}."
end
diff --git a/lib/sup/mbox/loader.rb b/lib/sup/mbox/loader.rb
@@ -22,7 +22,7 @@ class Loader < Source
raise ArgumentError, "not an mbox uri" unless uri.scheme == "mbox"
raise ArgumentError, "mbox URI ('#{uri}') cannot have a host: #{uri.host}" if uri.host
raise ArgumentError, "mbox URI must have a path component" unless uri.path
- @f = File.open uri.path
+ @f = File.open uri.path, 'rb:BINARY'
@path = uri.path
else
@f = uri_or_fp
@@ -114,7 +114,7 @@ class Loader < Source
def store_message date, from_email, &block
need_blank = File.exists?(@filename) && !File.zero?(@filename)
- File.open(@filename, "a") do |f|
+ File.open(@filename, "ab:BINARY") do |f|
f.puts if need_blank
f.puts "From #{from_email} #{date.rfc2822}"
yield f
diff --git a/lib/sup/message-chunks.rb b/lib/sup/message-chunks.rb
@@ -97,7 +97,7 @@ EOS
text = case @content_type
when /^text\/plain\b/
- Iconv.easy_decode $encoding, encoded_content.charset || $encoding, @raw_content
+ @raw_content
else
HookManager.run "mime-decode", :content_type => content_type,
:filename => lambda { write_to_disk },
@@ -107,6 +107,7 @@ EOS
@lines = nil
if text
+ text = text.transcode(encoded_content.charset || $encoding)
@lines = text.gsub("\r\n", "\n").gsub(/\t/, " ").gsub(/\r/, "").split("\n")
@quotable = true
end
diff --git a/lib/sup/message.rb b/lib/sup/message.rb
@@ -31,6 +31,7 @@ class Message
MAX_SIG_DISTANCE = 15 # lines from the end
DEFAULT_SUBJECT = ""
DEFAULT_SENDER = "(missing sender)"
+ MAX_HEADER_VALUE_SIZE = 4096
attr_reader :id, :date, :from, :subj, :refs, :replytos, :to, :source,
:cc, :bcc, :labels, :attachments, :list_address, :recipient_email, :replyto,
@@ -59,13 +60,15 @@ class Message
#parse_header(opts[:header] || @source.load_header(@source_info))
end
- def parse_header header
- ## forcibly decode these headers from and to the current encoding,
- ## which serves to strip out characters that aren't displayable
- ## (and which would otherwise be screwing up the display)
- %w(from to subject cc bcc).each do |f|
- header[f] = Iconv.easy_decode($encoding, $encoding, header[f]) if header[f]
- end
+ def decode_header_field v
+ return unless v
+ return v unless v.is_a? String
+ return unless v.size < MAX_HEADER_VALUE_SIZE # avoid regex blowup on spam
+ Rfc2047.decode_to $encoding, Iconv.easy_decode($encoding, 'ASCII', v)
+ end
+
+ def parse_header encoded_header
+ header = SavingHash.new { |k| decode_header_field encoded_header[k] }
@id = if header["message-id"]
mid = header["message-id"] =~ /<(.+?)>/ ? $1 : header["message-id"]
@@ -100,7 +103,7 @@ class Message
Time.now
end
- @subj = header.member?("subject") ? header["subject"].gsub(/\s+/, " ").gsub(/\s+$/, "") : DEFAULT_SUBJECT
+ @subj = header["subject"] ? header["subject"].gsub(/\s+/, " ").gsub(/\s+$/, "") : DEFAULT_SUBJECT
@to = Person.from_address_list header["to"]
@cc = Person.from_address_list header["cc"]
@bcc = Person.from_address_list header["bcc"]
@@ -236,8 +239,9 @@ class Message
## bloat the index.
## actually, it's also the differentiation between to/cc/bcc,
## so i will keep this.
- parse_header @source.load_header(@source_info)
- message_to_chunks @source.load_message(@source_info)
+ rmsg = @source.load_message(@source_info)
+ parse_header rmsg.header
+ message_to_chunks rmsg
rescue SourceError, SocketError => e
warn "problem getting messages from #{@source}: #{e.message}"
## we need force_to_top here otherwise this window will cover
@@ -443,15 +447,12 @@ private
from = payload.header.from.first ? payload.header.from.first.format : ""
to = payload.header.to.map { |p| p.format }.join(", ")
cc = payload.header.cc.map { |p| p.format }.join(", ")
- subj = payload.header.subject
- subj = subj ? Message.normalize_subj(payload.header.subject.gsub(/\s+/, " ").gsub(/\s+$/, "")) : subj
- if Rfc2047.is_encoded? subj
- subj = Rfc2047.decode_to $encoding, subj
- end
+ subj = decode_header_field(payload.header.subject) || DEFAULT_SUBJECT
+ subj = Message.normalize_subj(subj.gsub(/\s+/, " ").gsub(/\s+$/, ""))
msgdate = payload.header.date
- from_person = from ? Person.from_address(from) : nil
- to_people = to ? Person.from_address_list(to) : nil
- cc_people = cc ? Person.from_address_list(cc) : nil
+ from_person = from ? Person.from_address(decode_header_field from) : nil
+ to_people = to ? Person.from_address_list(decode_header_field to) : nil
+ cc_people = cc ? Person.from_address_list(decode_header_field cc) : nil
[Chunk::EnclosedMessage.new(from_person, to_people, cc_people, msgdate, subj)] + message_to_chunks(payload, encrypted)
else
debug "no body for message/rfc822 enclosure; skipping"
diff --git a/lib/sup/modes/thread-view-mode.rb b/lib/sup/modes/thread-view-mode.rb
@@ -165,14 +165,14 @@ EOS
def show_header
m = @message_lines[curpos] or return
BufferManager.spawn_unless_exists("Full header for #{m.id}") do
- TextMode.new m.raw_header
+ TextMode.new m.raw_header.ascii
end
end
def show_message
m = @message_lines[curpos] or return
BufferManager.spawn_unless_exists("Raw message for #{m.id}") do
- TextMode.new m.raw_message
+ TextMode.new m.raw_message.ascii
end
end
diff --git a/lib/sup/util.rb b/lib/sup/util.rb
@@ -177,7 +177,7 @@ class String
## nasty multibyte hack for ruby 1.8. if it's utf-8, split into chars using
## the utf8 regex and count those. otherwise, use the byte length.
def display_length
- if $encoding == "UTF-8" || $encoding == "utf8"
+ if RUBY_VERSION < '1.9.1' && ($encoding == "UTF-8" || $encoding == "utf8")
scan(/./u).size
else
size
@@ -296,6 +296,33 @@ class String
##
## split_on will be passed to String#split, so you can leave this nil for space.
def to_set_of_symbols split_on=nil; Set.new split(split_on).map { |x| x.strip.intern } end
+
+ class CheckError < ArgumentError; end
+ def check
+ begin
+ fail "unexpected encoding #{encoding}" if respond_to?(:encoding) && !(encoding == Encoding::UTF_8 || encoding == Encoding::ASCII)
+ fail "invalid encoding" if respond_to?(:valid_encoding?) && !valid_encoding?
+ rescue
+ raise CheckError.new($!.message)
+ end
+ end
+
+ def ascii
+ out = ""
+ each_byte do |b|
+ if (b & 128) != 0
+ out << "\\x#{b.to_s 16}"
+ else
+ out << b.chr
+ end
+ end
+ out.force_encoding Encoding::UTF_8 if out.respond_to? :force_encoding
+ out
+ end
+
+ def transcode src_encoding=$encoding
+ Iconv.easy_decode $encoding, src_encoding, self
+ end
end
class Numeric
@@ -641,21 +668,26 @@ class FinishLine
end
class Iconv
- def self.easy_decode target, charset, text
- return text if charset =~ /^(x-unknown|unknown[-_ ]?8bit|ascii[-_ ]?7[-_ ]?bit)$/i
- charset = case charset
+ def self.easy_decode target, orig_charset, text
+ if text.respond_to? :force_encoding
+ text = text.dup
+ text.force_encoding Encoding::BINARY
+ end
+ charset = case orig_charset
when /UTF[-_ ]?8/i then "utf-8"
when /(iso[-_ ])?latin[-_ ]?1$/i then "ISO-8859-1"
when /iso[-_ ]?8859[-_ ]?15/i then 'ISO-8859-15'
when /unicode[-_ ]1[-_ ]1[-_ ]utf[-_]7/i then "utf-7"
- else charset
+ when /^euc$/i then 'EUC-JP' # XXX try them all?
+ when /^(x-unknown|unknown[-_ ]?8bit|ascii[-_ ]?7[-_ ]?bit)$/i then 'ASCII'
+ else orig_charset
end
begin
- Iconv.iconv(target + "//IGNORE", charset, text + " ").join[0 .. -2]
- rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::InvalidCharacter, Iconv::IllegalSequence => e
- warn "couldn't transcode text from #{charset} to #{target} (\"#{text[0 ... 20]}\"...) (got #{e.message}); using original as is"
- text
+ returning(Iconv.iconv(target, charset, text + " ").join[0 .. -2]) { |str| str.check }
+ rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::InvalidCharacter, Iconv::IllegalSequence, String::CheckError
+ warn "couldn't transcode text from #{orig_charset} (#{charset}) to #{target}) (#{text[0 ... 20].inspect}...) (got #{$!.message} (#{$!.class}))"
+ text.ascii
end
end
end