lib/sup/message.rb (27223B) - raw
1 # encoding: UTF-8
2
3 require 'time'
4 require 'string-scrub' if /^2\.0\./ =~ RUBY_VERSION
5
6 module Redwood
7
8 ## a Message is what's threaded.
9 ##
10 ## it is also where the parsing for quotes and signatures is done, but
11 ## that should be moved out to a separate class at some point (because
12 ## i would like, for example, to be able to add in a ruby-talk
13 ## specific module that would detect and link to /ruby-talk:\d+/
14 ## sequences in the text of an email. (how sweet would that be?)
15
16 class Message
17 SNIPPET_LEN = 80
18 RE_PATTERN = /^((re|re[\[\(]\d[\]\)]):\s*)+/i
19
20 ## some utility methods
21 class << self
22 def normalize_subj s; s.gsub(RE_PATTERN, ""); end
23 def subj_is_reply? s; s =~ RE_PATTERN; end
24 def reify_subj s; subj_is_reply?(s) ? s : "Re: " + s; end
25 end
26
27 QUOTE_PATTERN = /^\s{0,4}[>|\}]/
28 BLOCK_QUOTE_PATTERN = /^-----\s*Original Message\s*----+$/
29 SIG_PATTERN = /(^(- )*-- ?$)|(^\s*----------+\s*$)|(^\s*_________+\s*$)|(^\s*--~--~-)|(^\s*--\+\+\*\*==)/
30
31 GPG_SIGNED_START = "-----BEGIN PGP SIGNED MESSAGE-----"
32 GPG_SIGNED_END = "-----END PGP SIGNED MESSAGE-----"
33 GPG_START = "-----BEGIN PGP MESSAGE-----"
34 GPG_END = "-----END PGP MESSAGE-----"
35 GPG_SIG_START = "-----BEGIN PGP SIGNATURE-----"
36 GPG_SIG_END = "-----END PGP SIGNATURE-----"
37
38 MAX_SIG_DISTANCE = 15 # lines from the end
39 DEFAULT_SUBJECT = ""
40 DEFAULT_SENDER = "(missing sender)"
41 MAX_HEADER_VALUE_SIZE = 4096
42
43 attr_reader :id, :date, :from, :subj, :refs, :replytos, :to,
44 :cc, :bcc, :labels, :attachments, :list_address, :recipient_email, :replyto,
45 :list_subscribe, :list_unsubscribe
46
47 bool_reader :dirty, :source_marked_read, :snippet_contains_encrypted_content
48
49 attr_accessor :locations
50
51 ## if you specify a :header, will use values from that. otherwise,
52 ## will try and load the header from the source.
53 def initialize opts
54 @locations = opts[:locations] or raise ArgumentError, "locations can't be nil"
55 @snippet = opts[:snippet]
56 @snippet_contains_encrypted_content = false
57 @have_snippet = !(opts[:snippet].nil? || opts[:snippet].empty?)
58 @labels = Set.new(opts[:labels] || [])
59 @dirty = false
60 @encrypted = false
61 @chunks = nil
62 @attachments = []
63
64 ## we need to initialize this. see comments in parse_header as to
65 ## why.
66 @refs = []
67
68 #parse_header(opts[:header] || @source.load_header(@source_info))
69 end
70
71 def decode_header_field v
72 return unless v
73 return v unless v.is_a? String
74 return unless v.size < MAX_HEADER_VALUE_SIZE # avoid regex blowup on spam
75 ## Header values should be either 7-bit with RFC2047-encoded words
76 ## or UTF-8 as per RFC6532. Replace any invalid high bytes with U+FFFD.
77 Rfc2047.decode_to $encoding, v.dup.force_encoding(Encoding::UTF_8).scrub
78 end
79
80 def parse_header encoded_header
81 header = SavingHash.new { |k| decode_header_field encoded_header[k] }
82
83 @id = ''
84 if header["message-id"]
85 mid = header["message-id"] =~ /<(.+?)>/ ? $1 : header["message-id"]
86 @id = sanitize_message_id mid
87 end
88 if (not @id.include? '@') || @id.length < 6
89 @id = "sup-faked-" + Digest::MD5.hexdigest(raw_header)
90 #from = header["from"]
91 #debug "faking non-existent message-id for message from #{from}: #{id}"
92 end
93
94 @from = Person.from_address(if header["from"]
95 header["from"]
96 else
97 name = "Sup Auto-generated Fake Sender <sup@fake.sender.example.com>"
98 #debug "faking non-existent sender for message #@id: #{name}"
99 name
100 end)
101
102 @date = case(date = header["date"])
103 when Time
104 date
105 when String
106 Time.rfc2822 date rescue nil
107 end
108 @date = location.fallback_date if @date.nil?
109 @date = Time.utc 1970, 1, 1 if @date.nil?
110
111 subj = header["subject"]
112 subj = subj ? subj.fix_encoding! : nil
113 @subj = subj ? subj.gsub(/\s+/, " ").gsub(/\s+$/, "") : DEFAULT_SUBJECT
114 @to = Person.from_address_list header["to"]
115 @cc = Person.from_address_list header["cc"]
116 @bcc = Person.from_address_list header["bcc"]
117
118 ## before loading our full header from the source, we can actually
119 ## have some extra refs set by the UI. (this happens when the user
120 ## joins threads manually). so we will merge the current refs values
121 ## in here.
122 refs = (header["references"] || "").scan(/<(.+?)>/).map { |x| sanitize_message_id x.first }
123 @refs = (@refs + refs).uniq
124 @replytos = (header["in-reply-to"] || "").scan(/<(.+?)>/).map { |x| sanitize_message_id x.first }
125
126 @replyto = Person.from_address header["reply-to"]
127 @list_address = if header["list-post"]
128 address = if header["list-post"] =~ /mailto:(.*?)[>\s$]/
129 $1
130 elsif header["list-post"] =~ /@/
131 header["list-post"] # just try the whole fucking thing
132 end
133 address && Person.from_address(address)
134 elsif header["mailing-list"]
135 address = if header["mailing-list"] =~ /list (.*?);/
136 $1
137 end
138 address && Person.from_address(address)
139 elsif header["x-mailing-list"]
140 Person.from_address header["x-mailing-list"]
141 end
142
143 @recipient_email = header["envelope-to"] || header["x-original-to"] || header["delivered-to"]
144 @source_marked_read = header["status"] == "RO"
145 @list_subscribe = header["list-subscribe"]
146 @list_unsubscribe = header["list-unsubscribe"]
147 end
148
149 ## Expected index entry format:
150 ## :message_id, :subject => String
151 ## :date => Time
152 ## :refs, :replytos => Array of String
153 ## :from => Person
154 ## :to, :cc, :bcc => Array of Person
155 def load_from_index! entry
156 @id = entry[:message_id]
157 @from = entry[:from]
158 @date = entry[:date]
159 @subj = entry[:subject]
160 @to = entry[:to]
161 @cc = entry[:cc]
162 @bcc = entry[:bcc]
163 @refs = (@refs + entry[:refs]).uniq
164 @replytos = entry[:replytos]
165
166 @replyto = nil
167 @list_address = nil
168 @recipient_email = nil
169 @source_marked_read = false
170 @list_subscribe = nil
171 @list_unsubscribe = nil
172 end
173
174 def add_ref ref
175 @refs << ref
176 @dirty = true
177 end
178
179 def remove_ref ref
180 @dirty = true if @refs.delete ref
181 end
182
183 attr_reader :snippet
184 def is_list_message?; !@list_address.nil?; end
185 def is_draft?; @labels.member? :draft; end
186 def draft_filename
187 raise "not a draft" unless is_draft?
188 source.fn_for_offset source_info
189 end
190
191 ## sanitize message ids by removing spaces and non-ascii characters.
192 ## also, truncate to 255 characters. all these steps are necessary
193 ## to make the index happy. of course, we probably fuck up a couple
194 ## valid message ids as well. as long as we're consistent, this
195 ## should be fine, though.
196 ##
197 ## also, mostly the message ids that are changed by this belong to
198 ## spam email.
199 ##
200 ## an alternative would be to SHA1 or MD5 all message ids on a regular basis.
201 ## don't tempt me.
202 def sanitize_message_id mid; mid.gsub(/(\s|[^\000-\177])+/, "")[0..254] end
203
204 def clear_dirty
205 @dirty = false
206 end
207
208 def has_label? t; @labels.member? t; end
209 def add_label l
210 l = l.to_sym
211 return if @labels.member? l
212 @labels << l
213 @dirty = true
214 end
215 def remove_label l
216 l = l.to_sym
217 return unless @labels.member? l
218 @labels.delete l
219 @dirty = true
220 end
221
222 def recipients
223 @to + @cc + @bcc
224 end
225
226 def labels= l
227 raise ArgumentError, "not a set" unless l.is_a?(Set)
228 raise ArgumentError, "not a set of labels" unless l.all? { |ll| ll.is_a?(Symbol) }
229 return if @labels == l
230 @labels = l
231 @dirty = true
232 end
233
234 def chunks
235 load_from_source!
236 @chunks
237 end
238
239 def location
240 @locations.find { |x| x.valid? } || raise(OutOfSyncSourceError.new)
241 end
242
243 def source
244 location.source
245 end
246
247 def source_info
248 location.info
249 end
250
251 ## this is called when the message body needs to actually be loaded.
252 def load_from_source!
253 @chunks ||=
254 begin
255 ## we need to re-read the header because it contains information
256 ## that we don't store in the index. actually i think it's just
257 ## the mailing list address (if any), so this is kinda overkill.
258 ## i could just store that in the index, but i think there might
259 ## be other things like that in the future, and i'd rather not
260 ## bloat the index.
261 ## actually, it's also the differentiation between to/cc/bcc,
262 ## so i will keep this.
263 rmsg = location.parsed_message
264 parse_header rmsg.header
265 message_to_chunks rmsg
266 rescue SourceError, SocketError, RMail::EncodingUnsupportedError => e
267 warn_with_location "problem reading message #{id}"
268 debug "could not load message, exception: #{e.inspect}"
269
270 [Chunk::Text.new(error_message.split("\n"))]
271
272 rescue Exception => e
273
274 warn_with_location "problem reading message #{id}"
275 debug "could not load message: #{location.inspect}, exception: #{e.inspect}"
276
277 raise e
278
279 end
280 end
281
282 def reload_from_source!
283 @chunks = nil
284 load_from_source!
285 end
286
287
288 def error_message
289 <<EOS
290 #@snippet...
291
292 ***********************************************************************
293 An error occurred while loading this message.
294 ***********************************************************************
295 EOS
296 end
297
298 def raw_header
299 location.raw_header
300 end
301
302 def raw_message
303 location.raw_message
304 end
305
306 def each_raw_message_line &b
307 location.each_raw_message_line(&b)
308 end
309
310 def sync_back
311 @locations.map { |l| l.sync_back @labels, self }.any? do
312 UpdateManager.relay self, :updated, self
313 end
314 end
315
316 def merge_labels_from_locations merge_labels
317 ## Get all labels from all locations
318 location_labels = Set.new([])
319
320 @locations.each do |l|
321 if l.valid?
322 location_labels = location_labels.union(l.labels?)
323 end
324 end
325
326 ## Add to the message labels the intersection between all location
327 ## labels and those we want to merge
328 location_labels = location_labels.intersection(merge_labels.to_set)
329
330 if not location_labels.empty?
331 @labels = @labels.union(location_labels)
332 @dirty = true
333 end
334 end
335
336 ## returns all the content from a message that will be indexed
337 def indexable_content
338 load_from_source!
339 [
340 from && from.indexable_content,
341 to.map { |p| p.indexable_content },
342 cc.map { |p| p.indexable_content },
343 bcc.map { |p| p.indexable_content },
344 indexable_chunks.map { |c| c.lines.map { |l| l.fix_encoding! } },
345 indexable_subject,
346 ].flatten.compact.join " "
347 end
348
349 def indexable_body
350 indexable_chunks.map { |c| c.lines }.flatten.compact.map { |l| l.fix_encoding! }.join " "
351 end
352
353 def indexable_chunks
354 chunks.select { |c| c.indexable? } || []
355 end
356
357 def indexable_subject
358 Message.normalize_subj(subj)
359 end
360
361 def quotable_body_lines
362 chunks.find_all { |c| c.quotable? }.map { |c| c.lines }.flatten
363 end
364
365 def quotable_header_lines
366 ["From: #{@from.full_address}"] +
367 (@to.empty? ? [] : ["To: " + @to.map { |p| p.full_address }.join(", ")]) +
368 (@cc.empty? ? [] : ["Cc: " + @cc.map { |p| p.full_address }.join(", ")]) +
369 (@bcc.empty? ? [] : ["Bcc: " + @bcc.map { |p| p.full_address }.join(", ")]) +
370 ["Date: #{@date.rfc822}",
371 "Subject: #{@subj}"]
372 end
373
374 def self.build_from_source source, source_info
375 m = Message.new :locations => [Location.new(source, source_info)]
376 m.load_from_source!
377 m
378 end
379
380 private
381
382 ## here's where we handle decoding mime attachments. unfortunately
383 ## but unsurprisingly, the world of mime attachments is a bit of a
384 ## mess. as an empiricist, i'm basing the following behavior on
385 ## observed mail rather than on interpretations of rfcs, so probably
386 ## this will have to be tweaked.
387 ##
388 ## the general behavior i want is: ignore content-disposition, at
389 ## least in so far as it suggests something being inline vs being an
390 ## attachment. (because really, that should be the recipient's
391 ## decision to make.) if a mime part is text/plain, OR if the user
392 ## decoding hook converts it, then decode it and display it
393 ## inline. for these decoded attachments, if it has associated
394 ## filename, then make it collapsable and individually saveable;
395 ## otherwise, treat it as regular body text.
396 ##
397 ## everything else is just an attachment and is not displayed
398 ## inline.
399 ##
400 ## so, in contrast to mutt, the user is not exposed to the workings
401 ## of the gruesome slaughterhouse and sausage factory that is a
402 ## mime-encoded message, but need only see the delicious end
403 ## product.
404
405 def multipart_signed_to_chunks m
406 if m.body.size != 2
407 warn_with_location "multipart/signed with #{m.body.size} parts (expecting 2)"
408 return
409 end
410
411 payload, signature = m.body
412 if signature.multipart?
413 warn_with_location "multipart/signed with payload multipart #{payload.multipart?} and signature multipart #{signature.multipart?}"
414 return
415 end
416
417 ## this probably will never happen
418 if payload.header.content_type && payload.header.content_type.downcase == "application/pgp-signature"
419 warn_with_location "multipart/signed with payload content type #{payload.header.content_type}"
420 return
421 end
422
423 if signature.header.content_type && signature.header.content_type.downcase != "application/pgp-signature"
424 ## unknown signature type; just ignore.
425 #warn "multipart/signed with signature content type #{signature.header.content_type}"
426 return
427 end
428
429 [CryptoManager.verify(payload, signature), message_to_chunks(payload)].flatten.compact
430 end
431
432 def multipart_encrypted_to_chunks m
433 if m.body.size != 2
434 warn_with_location "multipart/encrypted with #{m.body.size} parts (expecting 2)"
435 return
436 end
437
438 control, payload = m.body
439 if control.multipart?
440 warn_with_location "multipart/encrypted with control multipart #{control.multipart?} and payload multipart #{payload.multipart?}"
441 return
442 end
443
444 if payload.header.content_type && payload.header.content_type.downcase != "application/octet-stream"
445 warn_with_location "multipart/encrypted with payload content type #{payload.header.content_type}"
446 return
447 end
448
449 if control.header.content_type && control.header.content_type.downcase != "application/pgp-encrypted"
450 warn_with_location "multipart/encrypted with control content type #{signature.header.content_type}"
451 return
452 end
453
454 notice, sig, decryptedm = CryptoManager.decrypt payload
455 if decryptedm # managed to decrypt
456 children = message_to_chunks(decryptedm, true)
457 [notice, sig].compact + children
458 else
459 [notice]
460 end
461 end
462
463 def has_embedded_message? m
464 return false unless m.header.content_type
465 %w(message/rfc822 message/global).include? m.header.content_type.downcase
466 end
467
468 ## takes a RMail::Message, breaks it into Chunk:: classes.
469 def message_to_chunks m, encrypted=false, sibling_types=[]
470 if m.multipart?
471 chunks =
472 case m.header.content_type.downcase
473 when "multipart/signed"
474 multipart_signed_to_chunks m
475 when "multipart/encrypted"
476 multipart_encrypted_to_chunks m
477 end
478
479 unless chunks
480 sibling_types = m.body.map { |p| p.header.content_type }
481 chunks = m.body.map { |p| message_to_chunks p, encrypted, sibling_types }.flatten.compact
482 end
483
484 chunks
485 elsif has_embedded_message? m
486 encoding = m.header["Content-Transfer-Encoding"]
487 if m.body
488 body =
489 case encoding
490 when "base64"
491 m.body.unpack("m")[0]
492 when "quoted-printable"
493 m.body.unpack("M")[0]
494 when "7bit", "8bit", nil
495 m.body
496 else
497 raise RMail::EncodingUnsupportedError, encoding.inspect
498 end
499 body = body.normalize_whitespace
500 payload = RMail::Parser.read(body)
501 from = payload.header.from.first ? payload.header.from.first.format : ""
502 to = payload.header.to.map { |p| p.format }.join(", ")
503 cc = payload.header.cc.map { |p| p.format }.join(", ")
504 subj = decode_header_field(payload.header.subject) || DEFAULT_SUBJECT
505 subj = Message.normalize_subj(subj.gsub(/\s+/, " ").gsub(/\s+$/, ""))
506 msgdate = payload.header.date
507 from_person = from ? Person.from_address(decode_header_field(from)) : nil
508 to_people = to ? Person.from_address_list(decode_header_field(to)) : nil
509 cc_people = cc ? Person.from_address_list(decode_header_field(cc)) : nil
510 [Chunk::EnclosedMessage.new(from_person, to_people, cc_people, msgdate, subj)] + message_to_chunks(payload, encrypted)
511 else
512 debug "no body for message/rfc822 enclosure; skipping"
513 []
514 end
515 elsif m.header.content_type && m.header.content_type.downcase == "application/pgp" && m.body
516 ## apparently some versions of Thunderbird generate encryped email that
517 ## does not follow RFC3156, e.g. messages with X-Enigmail-Version: 0.95.0
518 ## they have no MIME multipart and just set the body content type to
519 ## application/pgp. this handles that.
520 ##
521 ## TODO 1: unduplicate code between here and
522 ## multipart_encrypted_to_chunks
523 ## TODO 2: this only tries to decrypt. it cannot handle inline PGP
524 notice, sig, decryptedm = CryptoManager.decrypt m.body
525 if decryptedm # managed to decrypt
526 children = message_to_chunks decryptedm, true
527 [notice, sig].compact + children
528 else
529 ## try inline pgp signed
530 chunks = inline_gpg_to_chunks m.body, $encoding, (m.charset || $encoding)
531 if chunks
532 chunks
533 else
534 [notice]
535 end
536 end
537 else
538 filename =
539 ## first, paw through the headers looking for a filename.
540 ## RFC 2183 (Content-Disposition) specifies that disposition-parms are
541 ## separated by ";". So, we match everything up to " and ; (if present).
542 if m.header["Content-Disposition"] && m.header["Content-Disposition"] =~ /filename="?(.*?[^\\])("|;|\z)/m
543 $1
544 elsif m.header["Content-Type"] && m.header["Content-Type"] =~ /name="?(.*?[^\\])("|;|\z)/im
545 $1
546
547 ## haven't found one, but it's a non-text message. fake
548 ## it.
549 ##
550 ## TODO: make this less lame.
551 elsif m.header["Content-Type"] && m.header["Content-Type"] !~ /^text\/plain/i
552 extension =
553 case m.header["Content-Type"]
554 when /text\/html/ then "html"
555 when /image\/(.*)/ then $1
556 end
557
558 ["sup-attachment-#{Time.now.to_i}-#{rand 10000}", extension].join(".")
559 end
560
561 ## if there's a filename, we'll treat it as an attachment.
562 if filename
563 ## filename could be 2047 encoded
564 filename = Rfc2047.decode_to $encoding, filename
565 # add this to the attachments list if its not a generated html
566 # attachment (should we allow images with generated names?).
567 # Lowercase the filename because searches are easier that way
568 @attachments.push filename.downcase unless filename =~ /^sup-attachment-/
569 add_label :attachment unless filename =~ /^sup-attachment-/
570 content_type = (m.header.content_type || "application/unknown").downcase # sometimes RubyMail gives us nil
571 [Chunk::Attachment.new(content_type, filename, m, sibling_types)]
572
573 ## otherwise, it's body text
574 else
575 ## Decode the body, charset conversion will follow either in
576 ## inline_gpg_to_chunks (for inline GPG signed messages) or
577 ## a few lines below (messages without inline GPG)
578 body = m.body ? m.decode : ""
579
580 ## Check for inline-PGP
581 chunks = inline_gpg_to_chunks body, $encoding, (m.charset || $encoding)
582 return chunks if chunks
583
584 if m.body
585 ## if there's no charset, use the current encoding as the charset.
586 ## this ensures that the body is normalized to avoid non-displayable
587 ## characters
588 body = m.decode.transcode($encoding, m.charset)
589 else
590 body = ""
591 end
592
593 text_to_chunks(body.normalize_whitespace.split("\n"), encrypted)
594 end
595 end
596 end
597
598 ## looks for gpg signed (but not encrypted) inline messages inside the
599 ## message body (there is no extra header for inline GPG) or for encrypted
600 ## (and possible signed) inline GPG messages
601 def inline_gpg_to_chunks body, encoding_to, encoding_from
602 lines = body.split("\n")
603
604 # First case: Message is enclosed between
605 #
606 # -----BEGIN PGP SIGNED MESSAGE-----
607 # and
608 # -----END PGP SIGNED MESSAGE-----
609 #
610 # In some cases, END PGP SIGNED MESSAGE doesn't appear
611 # (and may leave strange -----BEGIN PGP SIGNATURE----- ?)
612 gpg = lines.between(GPG_SIGNED_START, GPG_SIGNED_END)
613 # between does not check if GPG_END actually exists
614 # Reference: http://permalink.gmane.org/gmane.mail.sup.devel/641
615 if !gpg.empty?
616 msg = RMail::Message.new
617 msg.body = gpg.join("\n")
618
619 body = body.transcode(encoding_to, encoding_from)
620 lines = body.split("\n")
621 sig = lines.between(GPG_SIGNED_START, GPG_SIG_START)
622 startidx = lines.index(GPG_SIGNED_START)
623 endidx = lines.index(GPG_SIG_END)
624 before = startidx != 0 ? lines[0 .. startidx-1] : []
625 after = endidx ? lines[endidx+1 .. lines.size] : []
626
627 # sig contains BEGIN PGP SIGNED MESSAGE and END PGP SIGNATURE, so
628 # we ditch them. sig may also contain the hash used by PGP (with a
629 # newline), so we also skip them
630 sig_start = sig[1].match(/^Hash:/) ? 3 : 1
631 sig_end = sig.size-2
632 payload = RMail::Message.new
633 payload.body = sig[sig_start, sig_end].join("\n")
634 return [text_to_chunks(before, false),
635 CryptoManager.verify(nil, msg, false),
636 message_to_chunks(payload),
637 text_to_chunks(after, false)].flatten.compact
638 end
639
640 # Second case: Message is encrypted
641
642 gpg = lines.between(GPG_START, GPG_END)
643 # between does not check if GPG_END actually exists
644 if !gpg.empty? && !lines.index(GPG_END).nil?
645 msg = RMail::Message.new
646 msg.body = gpg.join("\n")
647
648 startidx = lines.index(GPG_START)
649 before = startidx != 0 ? lines[0 .. startidx-1] : []
650 after = lines[lines.index(GPG_END)+1 .. lines.size]
651
652 notice, sig, decryptedm = CryptoManager.decrypt msg, true
653 chunks = if decryptedm # managed to decrypt
654 children = message_to_chunks(decryptedm, true)
655 [notice, sig].compact + children
656 else
657 [notice]
658 end
659 return [text_to_chunks(before, false),
660 chunks,
661 text_to_chunks(after, false)].flatten.compact
662 end
663 end
664
665 ## parse the lines of text into chunk objects. the heuristics here
666 ## need tweaking in some nice manner. TODO: move these heuristics
667 ## into the classes themselves.
668 def text_to_chunks lines, encrypted
669 state = :text # one of :text, :quote, or :sig
670 chunks = []
671 chunk_lines = []
672 nextline_index = -1
673
674 lines.each_with_index do |line, i|
675 if i >= nextline_index
676 # look for next nonblank line only when needed to avoid O(n²)
677 # behavior on sequences of blank lines
678 if nextline_index = lines[(i+1)..-1].index { |l| l !~ /^\s*$/ } # skip blank lines
679 nextline_index += i + 1
680 nextline = lines[nextline_index]
681 else
682 nextline_index = lines.length
683 nextline = nil
684 end
685 end
686
687 case state
688 when :text
689 newstate = nil
690
691 ## the following /:$/ followed by /\w/ is an attempt to detect the
692 ## start of a quote. this is split into two regexen because the
693 ## original regex /\w.*:$/ had very poor behavior on long lines
694 ## like ":a:a:a:a:a" that occurred in certain emails.
695 if line =~ QUOTE_PATTERN || (line =~ /:$/ && line =~ /\w/ && nextline =~ QUOTE_PATTERN)
696 newstate = :quote
697 elsif line =~ SIG_PATTERN && (lines.length - i) < MAX_SIG_DISTANCE && !lines[(i+1)..-1].index { |l| l =~ /^-- $/ }
698 newstate = :sig
699 elsif line =~ BLOCK_QUOTE_PATTERN && nextline !~ QUOTE_PATTERN
700 newstate = :block_quote
701 end
702
703 if newstate
704 chunks << Chunk::Text.new(chunk_lines) unless chunk_lines.empty?
705 chunk_lines = [line]
706 state = newstate
707 else
708 chunk_lines << line
709 end
710
711 when :quote
712 newstate = nil
713
714 if line =~ QUOTE_PATTERN || (line =~ /^\s*$/ && nextline =~ QUOTE_PATTERN)
715 chunk_lines << line
716 elsif line =~ SIG_PATTERN && (lines.length - i) < MAX_SIG_DISTANCE
717 newstate = :sig
718 else
719 newstate = :text
720 end
721
722 if newstate
723 if chunk_lines.empty?
724 # nothing
725 else
726 chunks << Chunk::Quote.new(chunk_lines)
727 end
728 chunk_lines = [line]
729 state = newstate
730 end
731
732 when :block_quote, :sig
733 chunk_lines << line
734 end
735
736 if !@have_snippet && state == :text && (@snippet.nil? || @snippet.length < SNIPPET_LEN) && line !~ /[=\*#_-]{3,}/ && line !~ /^\s*$/
737 @snippet ||= ""
738 @snippet += " " unless @snippet.empty?
739 @snippet += line.gsub(/^\s+/, "").gsub(/[\r\n]/, "").gsub(/\s+/, " ")
740 oldlen = @snippet.length
741 @snippet = @snippet[0 ... SNIPPET_LEN].chomp
742 @snippet += "..." if @snippet.length < oldlen
743 @dirty = true unless encrypted && $config[:discard_snippets_from_encrypted_messages]
744 @snippet_contains_encrypted_content = true if encrypted
745 end
746 end
747
748 ## final object
749 case state
750 when :quote, :block_quote
751 chunks << Chunk::Quote.new(chunk_lines) unless chunk_lines.empty?
752 when :text
753 chunks << Chunk::Text.new(chunk_lines) unless chunk_lines.empty?
754 when :sig
755 chunks << Chunk::Signature.new(chunk_lines) unless chunk_lines.empty?
756 end
757 chunks
758 end
759
760 def warn_with_location msg
761 warn msg
762 warn "Message is in #{@locations}"
763 end
764 end
765
766 class Location
767 attr_reader :source
768 attr_reader :info
769
770 def initialize source, info
771 @source = source
772 @info = info
773 end
774
775 def raw_header
776 source.raw_header info
777 end
778
779 def raw_message
780 source.raw_message info
781 end
782
783 def sync_back labels, message
784 synced = false
785 return synced unless sync_back_enabled? and valid?
786 source.synchronize do
787 new_info = source.sync_back(@info, labels)
788 if new_info
789 @info = new_info
790 Index.sync_message message, true
791 synced = true
792 end
793 end
794 synced
795 end
796
797 def sync_back_enabled?
798 source.respond_to? :sync_back and $config[:sync_back_to_maildir] and source.sync_back_enabled?
799 end
800
801 ## much faster than raw_message
802 def each_raw_message_line &b
803 source.each_raw_message_line info, &b
804 end
805
806 def parsed_message
807 source.load_message info
808 end
809
810 def fallback_date
811 source.fallback_date_for_message info
812 end
813
814 def valid?
815 source.valid? info
816 end
817
818 def labels?
819 source.labels? info
820 end
821
822 def == o
823 o.source.id == source.id and o.info == info
824 end
825
826 def hash
827 [source.id, info].hash
828 end
829 end
830
831 end