sup

A curses threads-with-tags style email client

sup.git

git clone https://supmua.dev/git/sup/

lib/sup/message.rb (27223B) - raw

      1 # encoding: UTF-8
      2 
      3 require 'time'
      4 require 'string-scrub' if /^2\.0\./ =~ RUBY_VERSION
      5 
      6 module Redwood
      7 
      8 ## a Message is what's threaded.
      9 ##
     10 ## it is also where the parsing for quotes and signatures is done, but
     11 ## that should be moved out to a separate class at some point (because
     12 ## i would like, for example, to be able to add in a ruby-talk
     13 ## specific module that would detect and link to /ruby-talk:\d+/
     14 ## sequences in the text of an email. (how sweet would that be?)
     15 
     16 class Message
     17   SNIPPET_LEN = 80
     18   RE_PATTERN = /^((re|re[\[\(]\d[\]\)]):\s*)+/i
     19 
     20   ## some utility methods
     21   class << self
     22     def normalize_subj s; s.gsub(RE_PATTERN, ""); end
     23     def subj_is_reply? s; s =~ RE_PATTERN; end
     24     def reify_subj s; subj_is_reply?(s) ? s : "Re: " + s; end
     25   end
     26 
     27   QUOTE_PATTERN = /^\s{0,4}[>|\}]/
     28   BLOCK_QUOTE_PATTERN = /^-----\s*Original Message\s*----+$/
     29   SIG_PATTERN = /(^(- )*-- ?$)|(^\s*----------+\s*$)|(^\s*_________+\s*$)|(^\s*--~--~-)|(^\s*--\+\+\*\*==)/
     30 
     31   GPG_SIGNED_START = "-----BEGIN PGP SIGNED MESSAGE-----"
     32   GPG_SIGNED_END = "-----END PGP SIGNED MESSAGE-----"
     33   GPG_START = "-----BEGIN PGP MESSAGE-----"
     34   GPG_END = "-----END PGP MESSAGE-----"
     35   GPG_SIG_START = "-----BEGIN PGP SIGNATURE-----"
     36   GPG_SIG_END = "-----END PGP SIGNATURE-----"
     37 
     38   MAX_SIG_DISTANCE = 15 # lines from the end
     39   DEFAULT_SUBJECT = ""
     40   DEFAULT_SENDER = "(missing sender)"
     41   MAX_HEADER_VALUE_SIZE = 4096
     42 
     43   attr_reader :id, :date, :from, :subj, :refs, :replytos, :to,
     44               :cc, :bcc, :labels, :attachments, :list_address, :recipient_email, :replyto,
     45               :list_subscribe, :list_unsubscribe
     46 
     47   bool_reader :dirty, :source_marked_read, :snippet_contains_encrypted_content
     48 
     49   attr_accessor :locations
     50 
     51   ## if you specify a :header, will use values from that. otherwise,
     52   ## will try and load the header from the source.
     53   def initialize opts
     54     @locations = opts[:locations] or raise ArgumentError, "locations can't be nil"
     55     @snippet = opts[:snippet]
     56     @snippet_contains_encrypted_content = false
     57     @have_snippet = !(opts[:snippet].nil? || opts[:snippet].empty?)
     58     @labels = Set.new(opts[:labels] || [])
     59     @dirty = false
     60     @encrypted = false
     61     @chunks = nil
     62     @attachments = []
     63 
     64     ## we need to initialize this. see comments in parse_header as to
     65     ## why.
     66     @refs = []
     67 
     68     #parse_header(opts[:header] || @source.load_header(@source_info))
     69   end
     70 
     71   def decode_header_field v
     72     return unless v
     73     return v unless v.is_a? String
     74     return unless v.size < MAX_HEADER_VALUE_SIZE # avoid regex blowup on spam
     75     ## Header values should be either 7-bit with RFC2047-encoded words
     76     ## or UTF-8 as per RFC6532. Replace any invalid high bytes with U+FFFD.
     77     Rfc2047.decode_to $encoding, v.dup.force_encoding(Encoding::UTF_8).scrub
     78   end
     79 
     80   def parse_header encoded_header
     81     header = SavingHash.new { |k| decode_header_field encoded_header[k] }
     82 
     83     @id = ''
     84     if header["message-id"]
     85       mid = header["message-id"] =~ /<(.+?)>/ ? $1 : header["message-id"]
     86       @id = sanitize_message_id mid
     87     end
     88     if (not @id.include? '@') || @id.length < 6
     89       @id = "sup-faked-" + Digest::MD5.hexdigest(raw_header)
     90       #from = header["from"]
     91       #debug "faking non-existent message-id for message from #{from}: #{id}"
     92     end
     93 
     94     @from = Person.from_address(if header["from"]
     95       header["from"]
     96     else
     97       name = "Sup Auto-generated Fake Sender <sup@fake.sender.example.com>"
     98       #debug "faking non-existent sender for message #@id: #{name}"
     99       name
    100     end)
    101 
    102     @date = case(date = header["date"])
    103     when Time
    104       date
    105     when String
    106       Time.rfc2822 date rescue nil
    107     end
    108     @date = location.fallback_date if @date.nil?
    109     @date = Time.utc 1970, 1, 1 if @date.nil?
    110 
    111     subj = header["subject"]
    112     subj = subj ? subj.fix_encoding! : nil
    113     @subj = subj ? subj.gsub(/\s+/, " ").gsub(/\s+$/, "") : DEFAULT_SUBJECT
    114     @to = Person.from_address_list header["to"]
    115     @cc = Person.from_address_list header["cc"]
    116     @bcc = Person.from_address_list header["bcc"]
    117 
    118     ## before loading our full header from the source, we can actually
    119     ## have some extra refs set by the UI. (this happens when the user
    120     ## joins threads manually). so we will merge the current refs values
    121     ## in here.
    122     refs = (header["references"] || "").scan(/<(.+?)>/).map { |x| sanitize_message_id x.first }
    123     @refs = (@refs + refs).uniq
    124     @replytos = (header["in-reply-to"] || "").scan(/<(.+?)>/).map { |x| sanitize_message_id x.first }
    125 
    126     @replyto = Person.from_address header["reply-to"]
    127     @list_address = if header["list-post"]
    128       address = if header["list-post"] =~ /mailto:(.*?)[>\s$]/
    129         $1
    130       elsif header["list-post"] =~ /@/
    131         header["list-post"] # just try the whole fucking thing
    132       end
    133       address && Person.from_address(address)
    134     elsif header["mailing-list"]
    135       address = if header["mailing-list"] =~ /list (.*?);/
    136         $1
    137       end
    138       address && Person.from_address(address)
    139     elsif header["x-mailing-list"]
    140       Person.from_address header["x-mailing-list"]
    141     end
    142 
    143     @recipient_email = header["envelope-to"] || header["x-original-to"] || header["delivered-to"]
    144     @source_marked_read = header["status"] == "RO"
    145     @list_subscribe = header["list-subscribe"]
    146     @list_unsubscribe = header["list-unsubscribe"]
    147   end
    148 
    149   ## Expected index entry format:
    150   ## :message_id, :subject => String
    151   ## :date => Time
    152   ## :refs, :replytos => Array of String
    153   ## :from => Person
    154   ## :to, :cc, :bcc => Array of Person
    155   def load_from_index! entry
    156     @id = entry[:message_id]
    157     @from = entry[:from]
    158     @date = entry[:date]
    159     @subj = entry[:subject]
    160     @to = entry[:to]
    161     @cc = entry[:cc]
    162     @bcc = entry[:bcc]
    163     @refs = (@refs + entry[:refs]).uniq
    164     @replytos = entry[:replytos]
    165 
    166     @replyto = nil
    167     @list_address = nil
    168     @recipient_email = nil
    169     @source_marked_read = false
    170     @list_subscribe = nil
    171     @list_unsubscribe = nil
    172   end
    173 
    174   def add_ref ref
    175     @refs << ref
    176     @dirty = true
    177   end
    178 
    179   def remove_ref ref
    180     @dirty = true if @refs.delete ref
    181   end
    182 
    183   attr_reader :snippet
    184   def is_list_message?; !@list_address.nil?; end
    185   def is_draft?; @labels.member? :draft; end
    186   def draft_filename
    187     raise "not a draft" unless is_draft?
    188     source.fn_for_offset source_info
    189   end
    190 
    191   ## sanitize message ids by removing spaces and non-ascii characters.
    192   ## also, truncate to 255 characters. all these steps are necessary
    193   ## to make the index happy. of course, we probably fuck up a couple
    194   ## valid message ids as well. as long as we're consistent, this
    195   ## should be fine, though.
    196   ##
    197   ## also, mostly the message ids that are changed by this belong to
    198   ## spam email.
    199   ##
    200   ## an alternative would be to SHA1 or MD5 all message ids on a regular basis.
    201   ## don't tempt me.
    202   def sanitize_message_id mid; mid.gsub(/(\s|[^\000-\177])+/, "")[0..254] end
    203 
    204   def clear_dirty
    205     @dirty = false
    206   end
    207 
    208   def has_label? t; @labels.member? t; end
    209   def add_label l
    210     l = l.to_sym
    211     return if @labels.member? l
    212     @labels << l
    213     @dirty = true
    214   end
    215   def remove_label l
    216     l = l.to_sym
    217     return unless @labels.member? l
    218     @labels.delete l
    219     @dirty = true
    220   end
    221 
    222   def recipients
    223     @to + @cc + @bcc
    224   end
    225 
    226   def labels= l
    227     raise ArgumentError, "not a set" unless l.is_a?(Set)
    228     raise ArgumentError, "not a set of labels" unless l.all? { |ll| ll.is_a?(Symbol) }
    229     return if @labels == l
    230     @labels = l
    231     @dirty = true
    232   end
    233 
    234   def chunks
    235     load_from_source!
    236     @chunks
    237   end
    238 
    239   def location
    240     @locations.find { |x| x.valid? } || raise(OutOfSyncSourceError.new)
    241   end
    242 
    243   def source
    244     location.source
    245   end
    246 
    247   def source_info
    248     location.info
    249   end
    250 
    251   ## this is called when the message body needs to actually be loaded.
    252   def load_from_source!
    253     @chunks ||=
    254       begin
    255         ## we need to re-read the header because it contains information
    256         ## that we don't store in the index. actually i think it's just
    257         ## the mailing list address (if any), so this is kinda overkill.
    258         ## i could just store that in the index, but i think there might
    259         ## be other things like that in the future, and i'd rather not
    260         ## bloat the index.
    261         ## actually, it's also the differentiation between to/cc/bcc,
    262         ## so i will keep this.
    263         rmsg = location.parsed_message
    264         parse_header rmsg.header
    265         message_to_chunks rmsg
    266       rescue SourceError, SocketError, RMail::EncodingUnsupportedError => e
    267         warn_with_location "problem reading message #{id}"
    268         debug "could not load message, exception: #{e.inspect}"
    269 
    270         [Chunk::Text.new(error_message.split("\n"))]
    271 
    272       rescue Exception => e
    273 
    274         warn_with_location "problem reading message #{id}"
    275         debug "could not load message: #{location.inspect}, exception: #{e.inspect}"
    276 
    277         raise e
    278 
    279       end
    280   end
    281 
    282   def reload_from_source!
    283     @chunks = nil
    284     load_from_source!
    285   end
    286 
    287 
    288   def error_message
    289     <<EOS
    290 #@snippet...
    291 
    292 ***********************************************************************
    293  An error occurred while loading this message.
    294 ***********************************************************************
    295 EOS
    296   end
    297 
    298   def raw_header
    299     location.raw_header
    300   end
    301 
    302   def raw_message
    303     location.raw_message
    304   end
    305 
    306   def each_raw_message_line &b
    307     location.each_raw_message_line(&b)
    308   end
    309 
    310   def sync_back
    311     @locations.map { |l| l.sync_back @labels, self }.any? do
    312       UpdateManager.relay self, :updated, self
    313     end
    314   end
    315 
    316   def merge_labels_from_locations merge_labels
    317     ## Get all labels from all locations
    318     location_labels = Set.new([])
    319 
    320     @locations.each do |l|
    321       if l.valid?
    322         location_labels = location_labels.union(l.labels?)
    323       end
    324     end
    325 
    326     ## Add to the message labels the intersection between all location
    327     ## labels and those we want to merge
    328     location_labels = location_labels.intersection(merge_labels.to_set)
    329 
    330     if not location_labels.empty?
    331       @labels = @labels.union(location_labels)
    332       @dirty = true
    333     end
    334   end
    335 
    336   ## returns all the content from a message that will be indexed
    337   def indexable_content
    338     load_from_source!
    339     [
    340       from && from.indexable_content,
    341       to.map { |p| p.indexable_content },
    342       cc.map { |p| p.indexable_content },
    343       bcc.map { |p| p.indexable_content },
    344       indexable_chunks.map { |c| c.lines.map { |l| l.fix_encoding! } },
    345       indexable_subject,
    346     ].flatten.compact.join " "
    347   end
    348 
    349   def indexable_body
    350     indexable_chunks.map { |c| c.lines }.flatten.compact.map { |l| l.fix_encoding! }.join " "
    351   end
    352 
    353   def indexable_chunks
    354     chunks.select { |c| c.indexable? } || []
    355   end
    356 
    357   def indexable_subject
    358     Message.normalize_subj(subj)
    359   end
    360 
    361   def quotable_body_lines
    362     chunks.find_all { |c| c.quotable? }.map { |c| c.lines }.flatten
    363   end
    364 
    365   def quotable_header_lines
    366     ["From: #{@from.full_address}"] +
    367       (@to.empty? ? [] : ["To: " + @to.map { |p| p.full_address }.join(", ")]) +
    368       (@cc.empty? ? [] : ["Cc: " + @cc.map { |p| p.full_address }.join(", ")]) +
    369       (@bcc.empty? ? [] : ["Bcc: " + @bcc.map { |p| p.full_address }.join(", ")]) +
    370       ["Date: #{@date.rfc822}",
    371        "Subject: #{@subj}"]
    372   end
    373 
    374   def self.build_from_source source, source_info
    375     m = Message.new :locations => [Location.new(source, source_info)]
    376     m.load_from_source!
    377     m
    378   end
    379 
    380 private
    381 
    382   ## here's where we handle decoding mime attachments. unfortunately
    383   ## but unsurprisingly, the world of mime attachments is a bit of a
    384   ## mess. as an empiricist, i'm basing the following behavior on
    385   ## observed mail rather than on interpretations of rfcs, so probably
    386   ## this will have to be tweaked.
    387   ##
    388   ## the general behavior i want is: ignore content-disposition, at
    389   ## least in so far as it suggests something being inline vs being an
    390   ## attachment. (because really, that should be the recipient's
    391   ## decision to make.) if a mime part is text/plain, OR if the user
    392   ## decoding hook converts it, then decode it and display it
    393   ## inline. for these decoded attachments, if it has associated
    394   ## filename, then make it collapsable and individually saveable;
    395   ## otherwise, treat it as regular body text.
    396   ##
    397   ## everything else is just an attachment and is not displayed
    398   ## inline.
    399   ##
    400   ## so, in contrast to mutt, the user is not exposed to the workings
    401   ## of the gruesome slaughterhouse and sausage factory that is a
    402   ## mime-encoded message, but need only see the delicious end
    403   ## product.
    404 
    405   def multipart_signed_to_chunks m
    406     if m.body.size != 2
    407       warn_with_location "multipart/signed with #{m.body.size} parts (expecting 2)"
    408       return
    409     end
    410 
    411     payload, signature = m.body
    412     if signature.multipart?
    413       warn_with_location "multipart/signed with payload multipart #{payload.multipart?} and signature multipart #{signature.multipart?}"
    414       return
    415     end
    416 
    417     ## this probably will never happen
    418     if payload.header.content_type && payload.header.content_type.downcase == "application/pgp-signature"
    419       warn_with_location "multipart/signed with payload content type #{payload.header.content_type}"
    420       return
    421     end
    422 
    423     if signature.header.content_type && signature.header.content_type.downcase != "application/pgp-signature"
    424       ## unknown signature type; just ignore.
    425       #warn "multipart/signed with signature content type #{signature.header.content_type}"
    426       return
    427     end
    428 
    429     [CryptoManager.verify(payload, signature), message_to_chunks(payload)].flatten.compact
    430   end
    431 
    432   def multipart_encrypted_to_chunks m
    433     if m.body.size != 2
    434       warn_with_location "multipart/encrypted with #{m.body.size} parts (expecting 2)"
    435       return
    436     end
    437 
    438     control, payload = m.body
    439     if control.multipart?
    440       warn_with_location "multipart/encrypted with control multipart #{control.multipart?} and payload multipart #{payload.multipart?}"
    441       return
    442     end
    443 
    444     if payload.header.content_type && payload.header.content_type.downcase != "application/octet-stream"
    445       warn_with_location "multipart/encrypted with payload content type #{payload.header.content_type}"
    446       return
    447     end
    448 
    449     if control.header.content_type && control.header.content_type.downcase != "application/pgp-encrypted"
    450       warn_with_location "multipart/encrypted with control content type #{signature.header.content_type}"
    451       return
    452     end
    453 
    454     notice, sig, decryptedm = CryptoManager.decrypt payload
    455     if decryptedm # managed to decrypt
    456       children = message_to_chunks(decryptedm, true)
    457       [notice, sig].compact + children
    458     else
    459       [notice]
    460     end
    461   end
    462 
    463   def has_embedded_message? m
    464     return false unless m.header.content_type
    465     %w(message/rfc822 message/global).include? m.header.content_type.downcase
    466   end
    467 
    468   ## takes a RMail::Message, breaks it into Chunk:: classes.
    469   def message_to_chunks m, encrypted=false, sibling_types=[]
    470     if m.multipart?
    471       chunks =
    472         case m.header.content_type.downcase
    473         when "multipart/signed"
    474           multipart_signed_to_chunks m
    475         when "multipart/encrypted"
    476           multipart_encrypted_to_chunks m
    477         end
    478 
    479       unless chunks
    480         sibling_types = m.body.map { |p| p.header.content_type }
    481         chunks = m.body.map { |p| message_to_chunks p, encrypted, sibling_types }.flatten.compact
    482       end
    483 
    484       chunks
    485     elsif has_embedded_message? m
    486       encoding = m.header["Content-Transfer-Encoding"]
    487       if m.body
    488         body =
    489         case encoding
    490         when "base64"
    491           m.body.unpack("m")[0]
    492         when "quoted-printable"
    493           m.body.unpack("M")[0]
    494         when "7bit", "8bit", nil
    495           m.body
    496         else
    497           raise RMail::EncodingUnsupportedError, encoding.inspect
    498         end
    499         body = body.normalize_whitespace
    500         payload = RMail::Parser.read(body)
    501         from = payload.header.from.first ? payload.header.from.first.format : ""
    502         to = payload.header.to.map { |p| p.format }.join(", ")
    503         cc = payload.header.cc.map { |p| p.format }.join(", ")
    504         subj = decode_header_field(payload.header.subject) || DEFAULT_SUBJECT
    505         subj = Message.normalize_subj(subj.gsub(/\s+/, " ").gsub(/\s+$/, ""))
    506         msgdate = payload.header.date
    507         from_person = from ? Person.from_address(decode_header_field(from)) : nil
    508         to_people = to ? Person.from_address_list(decode_header_field(to)) : nil
    509         cc_people = cc ? Person.from_address_list(decode_header_field(cc)) : nil
    510         [Chunk::EnclosedMessage.new(from_person, to_people, cc_people, msgdate, subj)] + message_to_chunks(payload, encrypted)
    511       else
    512         debug "no body for message/rfc822 enclosure; skipping"
    513         []
    514       end
    515     elsif m.header.content_type && m.header.content_type.downcase == "application/pgp" && m.body
    516       ## apparently some versions of Thunderbird generate encryped email that
    517       ## does not follow RFC3156, e.g. messages with X-Enigmail-Version: 0.95.0
    518       ## they have no MIME multipart and just set the body content type to
    519       ## application/pgp. this handles that.
    520       ##
    521       ## TODO 1: unduplicate code between here and
    522       ##         multipart_encrypted_to_chunks
    523       ## TODO 2: this only tries to decrypt. it cannot handle inline PGP
    524       notice, sig, decryptedm = CryptoManager.decrypt m.body
    525       if decryptedm # managed to decrypt
    526         children = message_to_chunks decryptedm, true
    527         [notice, sig].compact + children
    528       else
    529         ## try inline pgp signed
    530       	chunks = inline_gpg_to_chunks m.body, $encoding, (m.charset || $encoding)
    531         if chunks
    532           chunks
    533         else
    534           [notice]
    535         end
    536       end
    537     else
    538       filename =
    539         ## first, paw through the headers looking for a filename.
    540         ## RFC 2183 (Content-Disposition) specifies that disposition-parms are
    541         ## separated by ";". So, we match everything up to " and ; (if present).
    542         if m.header["Content-Disposition"] && m.header["Content-Disposition"] =~ /filename="?(.*?[^\\])("|;|\z)/m
    543           $1
    544         elsif m.header["Content-Type"] && m.header["Content-Type"] =~ /name="?(.*?[^\\])("|;|\z)/im
    545           $1
    546 
    547         ## haven't found one, but it's a non-text message. fake
    548         ## it.
    549         ##
    550         ## TODO: make this less lame.
    551         elsif m.header["Content-Type"] && m.header["Content-Type"] !~ /^text\/plain/i
    552           extension =
    553             case m.header["Content-Type"]
    554             when /text\/html/ then "html"
    555             when /image\/(.*)/ then $1
    556             end
    557 
    558           ["sup-attachment-#{Time.now.to_i}-#{rand 10000}", extension].join(".")
    559         end
    560 
    561       ## if there's a filename, we'll treat it as an attachment.
    562       if filename
    563         ## filename could be 2047 encoded
    564         filename = Rfc2047.decode_to $encoding, filename
    565         # add this to the attachments list if its not a generated html
    566         # attachment (should we allow images with generated names?).
    567         # Lowercase the filename because searches are easier that way
    568         @attachments.push filename.downcase unless filename =~ /^sup-attachment-/
    569         add_label :attachment unless filename =~ /^sup-attachment-/
    570         content_type = (m.header.content_type || "application/unknown").downcase # sometimes RubyMail gives us nil
    571         [Chunk::Attachment.new(content_type, filename, m, sibling_types)]
    572 
    573       ## otherwise, it's body text
    574       else
    575         ## Decode the body, charset conversion will follow either in
    576         ## inline_gpg_to_chunks (for inline GPG signed messages) or
    577         ## a few lines below (messages without inline GPG)
    578         body = m.body ? m.decode : ""
    579 
    580         ## Check for inline-PGP
    581         chunks = inline_gpg_to_chunks body, $encoding, (m.charset || $encoding)
    582         return chunks if chunks
    583 
    584         if m.body
    585           ## if there's no charset, use the current encoding as the charset.
    586           ## this ensures that the body is normalized to avoid non-displayable
    587           ## characters
    588           body = m.decode.transcode($encoding, m.charset)
    589         else
    590           body = ""
    591         end
    592 
    593         text_to_chunks(body.normalize_whitespace.split("\n"), encrypted)
    594       end
    595     end
    596   end
    597 
    598   ## looks for gpg signed (but not encrypted) inline  messages inside the
    599   ## message body (there is no extra header for inline GPG) or for encrypted
    600   ## (and possible signed) inline GPG messages
    601   def inline_gpg_to_chunks body, encoding_to, encoding_from
    602     lines = body.split("\n")
    603 
    604     # First case: Message is enclosed between
    605     #
    606     # -----BEGIN PGP SIGNED MESSAGE-----
    607     # and
    608     # -----END PGP SIGNED MESSAGE-----
    609     #
    610     # In some cases, END PGP SIGNED MESSAGE doesn't appear
    611     # (and may leave strange -----BEGIN PGP SIGNATURE----- ?)
    612     gpg = lines.between(GPG_SIGNED_START, GPG_SIGNED_END)
    613     # between does not check if GPG_END actually exists
    614     # Reference: http://permalink.gmane.org/gmane.mail.sup.devel/641
    615     if !gpg.empty?
    616       msg = RMail::Message.new
    617       msg.body = gpg.join("\n")
    618 
    619       body = body.transcode(encoding_to, encoding_from)
    620       lines = body.split("\n")
    621       sig = lines.between(GPG_SIGNED_START, GPG_SIG_START)
    622       startidx = lines.index(GPG_SIGNED_START)
    623       endidx = lines.index(GPG_SIG_END)
    624       before = startidx != 0 ? lines[0 .. startidx-1] : []
    625       after = endidx ? lines[endidx+1 .. lines.size] : []
    626 
    627       # sig contains BEGIN PGP SIGNED MESSAGE and END PGP SIGNATURE, so
    628       # we ditch them. sig may also contain the hash used by PGP (with a
    629       # newline), so we also skip them
    630       sig_start = sig[1].match(/^Hash:/) ? 3 : 1
    631       sig_end = sig.size-2
    632       payload = RMail::Message.new
    633       payload.body = sig[sig_start, sig_end].join("\n")
    634       return [text_to_chunks(before, false),
    635               CryptoManager.verify(nil, msg, false),
    636               message_to_chunks(payload),
    637               text_to_chunks(after, false)].flatten.compact
    638     end
    639 
    640     # Second case: Message is encrypted
    641 
    642     gpg = lines.between(GPG_START, GPG_END)
    643     # between does not check if GPG_END actually exists
    644     if !gpg.empty? && !lines.index(GPG_END).nil?
    645       msg = RMail::Message.new
    646       msg.body = gpg.join("\n")
    647 
    648       startidx = lines.index(GPG_START)
    649       before = startidx != 0 ? lines[0 .. startidx-1] : []
    650       after = lines[lines.index(GPG_END)+1 .. lines.size]
    651 
    652       notice, sig, decryptedm = CryptoManager.decrypt msg, true
    653       chunks = if decryptedm # managed to decrypt
    654         children = message_to_chunks(decryptedm, true)
    655         [notice, sig].compact + children
    656       else
    657         [notice]
    658       end
    659       return [text_to_chunks(before, false),
    660               chunks,
    661               text_to_chunks(after, false)].flatten.compact
    662     end
    663   end
    664 
    665   ## parse the lines of text into chunk objects.  the heuristics here
    666   ## need tweaking in some nice manner. TODO: move these heuristics
    667   ## into the classes themselves.
    668   def text_to_chunks lines, encrypted
    669     state = :text # one of :text, :quote, or :sig
    670     chunks = []
    671     chunk_lines = []
    672     nextline_index = -1
    673 
    674     lines.each_with_index do |line, i|
    675       if i >= nextline_index
    676         # look for next nonblank line only when needed to avoid O(n²)
    677         # behavior on sequences of blank lines
    678         if nextline_index = lines[(i+1)..-1].index { |l| l !~ /^\s*$/ } # skip blank lines
    679           nextline_index += i + 1
    680           nextline = lines[nextline_index]
    681         else
    682           nextline_index = lines.length
    683           nextline = nil
    684         end
    685       end
    686 
    687       case state
    688       when :text
    689         newstate = nil
    690 
    691         ## the following /:$/ followed by /\w/ is an attempt to detect the
    692         ## start of a quote. this is split into two regexen because the
    693         ## original regex /\w.*:$/ had very poor behavior on long lines
    694         ## like ":a:a:a:a:a" that occurred in certain emails.
    695         if line =~ QUOTE_PATTERN || (line =~ /:$/ && line =~ /\w/ && nextline =~ QUOTE_PATTERN)
    696           newstate = :quote
    697         elsif line =~ SIG_PATTERN && (lines.length - i) < MAX_SIG_DISTANCE && !lines[(i+1)..-1].index { |l| l =~ /^-- $/ }
    698           newstate = :sig
    699         elsif line =~ BLOCK_QUOTE_PATTERN && nextline !~ QUOTE_PATTERN
    700           newstate = :block_quote
    701         end
    702 
    703         if newstate
    704           chunks << Chunk::Text.new(chunk_lines) unless chunk_lines.empty?
    705           chunk_lines = [line]
    706           state = newstate
    707         else
    708           chunk_lines << line
    709         end
    710 
    711       when :quote
    712         newstate = nil
    713 
    714         if line =~ QUOTE_PATTERN || (line =~ /^\s*$/ && nextline =~ QUOTE_PATTERN)
    715           chunk_lines << line
    716         elsif line =~ SIG_PATTERN && (lines.length - i) < MAX_SIG_DISTANCE
    717           newstate = :sig
    718         else
    719           newstate = :text
    720         end
    721 
    722         if newstate
    723           if chunk_lines.empty?
    724             # nothing
    725           else
    726             chunks << Chunk::Quote.new(chunk_lines)
    727           end
    728           chunk_lines = [line]
    729           state = newstate
    730         end
    731 
    732       when :block_quote, :sig
    733         chunk_lines << line
    734       end
    735 
    736       if !@have_snippet && state == :text && (@snippet.nil? || @snippet.length < SNIPPET_LEN) && line !~ /[=\*#_-]{3,}/ && line !~ /^\s*$/
    737         @snippet ||= ""
    738         @snippet += " " unless @snippet.empty?
    739         @snippet += line.gsub(/^\s+/, "").gsub(/[\r\n]/, "").gsub(/\s+/, " ")
    740         oldlen = @snippet.length
    741         @snippet = @snippet[0 ... SNIPPET_LEN].chomp
    742         @snippet += "..." if @snippet.length < oldlen
    743         @dirty = true unless encrypted && $config[:discard_snippets_from_encrypted_messages]
    744         @snippet_contains_encrypted_content = true if encrypted
    745       end
    746     end
    747 
    748     ## final object
    749     case state
    750     when :quote, :block_quote
    751       chunks << Chunk::Quote.new(chunk_lines) unless chunk_lines.empty?
    752     when :text
    753       chunks << Chunk::Text.new(chunk_lines) unless chunk_lines.empty?
    754     when :sig
    755       chunks << Chunk::Signature.new(chunk_lines) unless chunk_lines.empty?
    756     end
    757     chunks
    758   end
    759 
    760   def warn_with_location msg
    761     warn msg
    762     warn "Message is in #{@locations}"
    763   end
    764 end
    765 
    766 class Location
    767   attr_reader :source
    768   attr_reader :info
    769 
    770   def initialize source, info
    771     @source = source
    772     @info = info
    773   end
    774 
    775   def raw_header
    776     source.raw_header info
    777   end
    778 
    779   def raw_message
    780     source.raw_message info
    781   end
    782 
    783   def sync_back labels, message
    784     synced = false
    785     return synced unless sync_back_enabled? and valid?
    786     source.synchronize do
    787       new_info = source.sync_back(@info, labels)
    788       if new_info
    789         @info = new_info
    790         Index.sync_message message, true
    791         synced = true
    792       end
    793     end
    794     synced
    795   end
    796 
    797   def sync_back_enabled?
    798     source.respond_to? :sync_back and $config[:sync_back_to_maildir] and source.sync_back_enabled?
    799   end
    800 
    801   ## much faster than raw_message
    802   def each_raw_message_line &b
    803     source.each_raw_message_line info, &b
    804   end
    805 
    806   def parsed_message
    807     source.load_message info
    808   end
    809 
    810   def fallback_date
    811     source.fallback_date_for_message info
    812   end
    813 
    814   def valid?
    815     source.valid? info
    816   end
    817 
    818   def labels?
    819     source.labels? info
    820   end
    821 
    822   def == o
    823     o.source.id == source.id and o.info == info
    824   end
    825 
    826   def hash
    827     [source.id, info].hash
    828   end
    829 end
    830 
    831 end