sup

A curses threads-with-tags style email client

sup.git

git clone https://supmua.dev/git/sup/
commit a1669cab5edc11e2260898f4592e7f6a1c488544
parent b3d5d414554cbcff951e2497bb98957dead0fb8c
Author: William Morgan <wmorgan-sup@masanjin.net>
Date:   Fri,  1 Jan 2010 10:35:36 -0800

Merge branch 'ruby-1.9-encoding' into next

Diffstat:
M lib/sup/maildir.rb | 4 ++--
M lib/sup/mbox/loader.rb | 4 ++--
M lib/sup/message-chunks.rb | 3 ++-
M lib/sup/message.rb | 37 +++++++++++++++++++------------------
M lib/sup/modes/thread-view-mode.rb | 4 ++--
M lib/sup/util.rb | 50 +++++++++++++++++++++++++++++++++++++++++---------
6 files changed, 68 insertions(+), 34 deletions(-)
diff --git a/lib/sup/maildir.rb b/lib/sup/maildir.rb
@@ -59,7 +59,7 @@ class Maildir < Source
         File.stat(tmp_path)
       rescue Errno::ENOENT #this is what we want.
         begin
-          File.open(tmp_path, 'w') do |f|
+          File.open(tmp_path, 'wb:BINARY') do |f|
             yield f #provide a writable interface for the caller
             f.fsync
           end
@@ -207,7 +207,7 @@ private
   def with_file_for id
     fn = @ids_to_fns[id] or raise OutOfSyncSourceError, "No such id: #{id.inspect}."
     begin
-      File.open(fn) { |f| yield f }
+      File.open(fn, 'rb:BINARY') { |f| yield f }
     rescue SystemCallError, IOError => e
       raise FatalSourceError, "Problem reading file for id #{id.inspect}: #{fn.inspect}: #{e.message}."
     end
diff --git a/lib/sup/mbox/loader.rb b/lib/sup/mbox/loader.rb
@@ -22,7 +22,7 @@ class Loader < Source
       raise ArgumentError, "not an mbox uri" unless uri.scheme == "mbox"
       raise ArgumentError, "mbox URI ('#{uri}') cannot have a host: #{uri.host}" if uri.host
       raise ArgumentError, "mbox URI must have a path component" unless uri.path
-      @f = File.open uri.path
+      @f = File.open uri.path, 'rb:BINARY'
       @path = uri.path
     else
       @f = uri_or_fp
@@ -114,7 +114,7 @@ class Loader < Source
 
   def store_message date, from_email, &block
     need_blank = File.exists?(@filename) && !File.zero?(@filename)
-    File.open(@filename, "a") do |f|
+    File.open(@filename, "ab:BINARY") do |f|
       f.puts if need_blank
       f.puts "From #{from_email} #{date.rfc2822}"
       yield f
diff --git a/lib/sup/message-chunks.rb b/lib/sup/message-chunks.rb
@@ -97,7 +97,7 @@ EOS
 
       text = case @content_type
       when /^text\/plain\b/
-        Iconv.easy_decode $encoding, encoded_content.charset || $encoding, @raw_content
+        @raw_content
       else
         HookManager.run "mime-decode", :content_type => content_type,
                         :filename => lambda { write_to_disk },
@@ -107,6 +107,7 @@ EOS
 
       @lines = nil
       if text
+        text = text.transcode(encoded_content.charset || $encoding)
         @lines = text.gsub("\r\n", "\n").gsub(/\t/, "        ").gsub(/\r/, "").split("\n")
         @quotable = true
       end
diff --git a/lib/sup/message.rb b/lib/sup/message.rb
@@ -31,6 +31,7 @@ class Message
   MAX_SIG_DISTANCE = 15 # lines from the end
   DEFAULT_SUBJECT = ""
   DEFAULT_SENDER = "(missing sender)"
+  MAX_HEADER_VALUE_SIZE = 4096
 
   attr_reader :id, :date, :from, :subj, :refs, :replytos, :to, :source,
               :cc, :bcc, :labels, :attachments, :list_address, :recipient_email, :replyto,
@@ -59,13 +60,15 @@ class Message
     #parse_header(opts[:header] || @source.load_header(@source_info))
   end
 
-  def parse_header header
-    ## forcibly decode these headers from and to the current encoding,
-    ## which serves to strip out characters that aren't displayable
-    ## (and which would otherwise be screwing up the display)
-    %w(from to subject cc bcc).each do |f|
-      header[f] = Iconv.easy_decode($encoding, $encoding, header[f]) if header[f]
-    end
+  def decode_header_field v
+    return unless v
+    return v unless v.is_a? String
+    return unless v.size < MAX_HEADER_VALUE_SIZE # avoid regex blowup on spam
+    Rfc2047.decode_to $encoding, Iconv.easy_decode($encoding, 'ASCII', v)
+  end
+
+  def parse_header encoded_header
+    header = SavingHash.new { |k| decode_header_field encoded_header[k] }
 
     @id = if header["message-id"]
       mid = header["message-id"] =~ /<(.+?)>/ ? $1 : header["message-id"]
@@ -100,7 +103,7 @@ class Message
       Time.now
     end
 
-    @subj = header.member?("subject") ? header["subject"].gsub(/\s+/, " ").gsub(/\s+$/, "") : DEFAULT_SUBJECT
+    @subj = header["subject"] ? header["subject"].gsub(/\s+/, " ").gsub(/\s+$/, "") : DEFAULT_SUBJECT
     @to = Person.from_address_list header["to"]
     @cc = Person.from_address_list header["cc"]
     @bcc = Person.from_address_list header["bcc"]
@@ -236,8 +239,9 @@ class Message
           ## bloat the index.
           ## actually, it's also the differentiation between to/cc/bcc,
           ## so i will keep this.
-          parse_header @source.load_header(@source_info)
-          message_to_chunks @source.load_message(@source_info)
+          rmsg = @source.load_message(@source_info)
+          parse_header rmsg.header
+          message_to_chunks rmsg
         rescue SourceError, SocketError => e
           warn "problem getting messages from #{@source}: #{e.message}"
           ## we need force_to_top here otherwise this window will cover
@@ -443,15 +447,12 @@ private
         from = payload.header.from.first ? payload.header.from.first.format : ""
         to = payload.header.to.map { |p| p.format }.join(", ")
         cc = payload.header.cc.map { |p| p.format }.join(", ")
-        subj = payload.header.subject
-        subj = subj ? Message.normalize_subj(payload.header.subject.gsub(/\s+/, " ").gsub(/\s+$/, "")) : subj
-        if Rfc2047.is_encoded? subj
-          subj = Rfc2047.decode_to $encoding, subj
-        end
+        subj = decode_header_field(payload.header.subject) || DEFAULT_SUBJECT
+        subj = Message.normalize_subj(subj.gsub(/\s+/, " ").gsub(/\s+$/, ""))
         msgdate = payload.header.date
-        from_person = from ? Person.from_address(from) : nil
-        to_people = to ? Person.from_address_list(to) : nil
-        cc_people = cc ? Person.from_address_list(cc) : nil
+        from_person = from ? Person.from_address(decode_header_field from) : nil
+        to_people = to ? Person.from_address_list(decode_header_field to) : nil
+        cc_people = cc ? Person.from_address_list(decode_header_field cc) : nil
         [Chunk::EnclosedMessage.new(from_person, to_people, cc_people, msgdate, subj)] + message_to_chunks(payload, encrypted)
       else
         debug "no body for message/rfc822 enclosure; skipping"
diff --git a/lib/sup/modes/thread-view-mode.rb b/lib/sup/modes/thread-view-mode.rb
@@ -165,14 +165,14 @@ EOS
   def show_header
     m = @message_lines[curpos] or return
     BufferManager.spawn_unless_exists("Full header for #{m.id}") do
-      TextMode.new m.raw_header
+      TextMode.new m.raw_header.ascii
     end
   end
 
   def show_message
     m = @message_lines[curpos] or return
     BufferManager.spawn_unless_exists("Raw message for #{m.id}") do
-      TextMode.new m.raw_message
+      TextMode.new m.raw_message.ascii
     end
   end
 
diff --git a/lib/sup/util.rb b/lib/sup/util.rb
@@ -177,7 +177,7 @@ class String
   ## nasty multibyte hack for ruby 1.8. if it's utf-8, split into chars using
   ## the utf8 regex and count those. otherwise, use the byte length.
   def display_length
-    if $encoding == "UTF-8" || $encoding == "utf8"
+    if RUBY_VERSION < '1.9.1' && ($encoding == "UTF-8" || $encoding == "utf8")
       scan(/./u).size
     else
       size
@@ -296,6 +296,33 @@ class String
   ##
   ## split_on will be passed to String#split, so you can leave this nil for space.
   def to_set_of_symbols split_on=nil; Set.new split(split_on).map { |x| x.strip.intern } end
+
+  class CheckError < ArgumentError; end
+  def check
+    begin
+      fail "unexpected encoding #{encoding}" if respond_to?(:encoding) && !(encoding == Encoding::UTF_8 || encoding == Encoding::ASCII)
+      fail "invalid encoding" if respond_to?(:valid_encoding?) && !valid_encoding?
+    rescue
+      raise CheckError.new($!.message)
+    end
+  end
+
+  def ascii
+    out = ""
+    each_byte do |b|
+      if (b & 128) != 0
+        out << "\\x#{b.to_s 16}"
+      else
+        out << b.chr
+      end
+    end
+    out.force_encoding Encoding::UTF_8 if out.respond_to? :force_encoding
+    out
+  end
+
+  def transcode src_encoding=$encoding
+    Iconv.easy_decode $encoding, src_encoding, self
+  end
 end
 
 class Numeric
@@ -641,21 +668,26 @@ class FinishLine
 end
 
 class Iconv
-  def self.easy_decode target, charset, text
-    return text if charset =~ /^(x-unknown|unknown[-_ ]?8bit|ascii[-_ ]?7[-_ ]?bit)$/i
-    charset = case charset
+  def self.easy_decode target, orig_charset, text
+    if text.respond_to? :force_encoding
+      text = text.dup
+      text.force_encoding Encoding::BINARY
+    end
+    charset = case orig_charset
       when /UTF[-_ ]?8/i then "utf-8"
       when /(iso[-_ ])?latin[-_ ]?1$/i then "ISO-8859-1"
       when /iso[-_ ]?8859[-_ ]?15/i then 'ISO-8859-15'
       when /unicode[-_ ]1[-_ ]1[-_ ]utf[-_]7/i then "utf-7"
-      else charset
+      when /^euc$/i then 'EUC-JP' # XXX try them all?
+      when /^(x-unknown|unknown[-_ ]?8bit|ascii[-_ ]?7[-_ ]?bit)$/i then 'ASCII'
+      else orig_charset
     end
 
     begin
-      Iconv.iconv(target + "//IGNORE", charset, text + " ").join[0 .. -2]
-    rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::InvalidCharacter, Iconv::IllegalSequence => e
-      warn "couldn't transcode text from #{charset} to #{target} (\"#{text[0 ... 20]}\"...) (got #{e.message}); using original as is"
-      text
+      returning(Iconv.iconv(target, charset, text + " ").join[0 .. -2]) { |str| str.check }
+    rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::InvalidCharacter, Iconv::IllegalSequence, String::CheckError
+      warn "couldn't transcode text from #{orig_charset} (#{charset}) to #{target}) (#{text[0 ... 20].inspect}...) (got #{$!.message} (#{$!.class}))"
+      text.ascii
     end
   end
 end