test/test_message.rb (15761B) - raw
1 #!/usr/bin/ruby
2
3 require 'test_helper'
4 require 'sup'
5 require 'stringio'
6
7 require 'dummy_source'
8
9 module Redwood
10
11 class TestMessage < Minitest::Test
12
13 def setup
14 @path = Dir.mktmpdir
15 Redwood::HookManager.init File.join(@path, 'hooks')
16 end
17
18 def teardown
19 Redwood::HookManager.deinstantiate!
20 FileUtils.rm_r @path
21 end
22
23 def test_simple_message
24 source = DummySource.new("sup-test://test_simple_message")
25 source.messages = [ fixture_path('simple-message.eml') ]
26 source_info = 0
27
28 sup_message = Message.build_from_source(source, source_info)
29 sup_message.load_from_source!
30
31 # see how well parsing the header went
32 to = sup_message.to
33 assert(to.is_a? Array)
34 assert(to.first.is_a? Person)
35 assert_equal(1, to.length)
36
37 # sup doesn't do capitalized letters in email addresses
38 assert_equal("fake_receiver@localhost", to[0].email)
39 assert_equal("Fake Receiver", to[0].name)
40
41 from = sup_message.from
42 assert(from.is_a? Person)
43 assert_equal("fake_sender@example.invalid", from.email)
44 assert_equal("Fake Sender", from.name)
45
46 subj = sup_message.subj
47 assert_equal("Re: Test message subject", subj)
48
49 list_subscribe = sup_message.list_subscribe
50 assert_equal("<mailto:example-subscribe@example.invalid>", list_subscribe)
51
52 list_unsubscribe = sup_message.list_unsubscribe
53 assert_equal("<mailto:example-unsubscribe@example.invalid>", list_unsubscribe)
54
55 list_address = sup_message.list_address
56 assert_equal("example@example.invalid", list_address.email)
57 assert_equal("example", list_address.name)
58
59 date = sup_message.date
60 assert_equal(Time.parse("Sun, 9 Dec 2007 21:48:19 +0200"), date)
61
62 id = sup_message.id
63 assert_equal("20071209194819.GA25972@example.invalid", id)
64
65 refs = sup_message.refs
66 assert_equal(1, refs.length)
67 assert_equal("E1J1Rvb-0006k2-CE@localhost.localdomain", refs[0])
68
69 replytos = sup_message.replytos
70 assert_equal(1, replytos.length)
71 assert_equal("E1J1Rvb-0006k2-CE@localhost.localdomain", replytos[0])
72
73 assert_empty(sup_message.cc)
74 assert_empty(sup_message.bcc)
75
76 recipient_email = sup_message.recipient_email
77 assert_equal("fake_receiver@localhost", recipient_email)
78
79 message_source = sup_message.source
80 assert_equal(message_source, source)
81
82 message_source_info = sup_message.source_info
83 assert_equal(message_source_info, source_info)
84
85 # read the message body chunks
86 chunks = sup_message.load_from_source!
87
88 # there should be only one chunk
89 assert_equal(1, chunks.length)
90
91 lines = chunks.first.lines
92
93 # there should be only one line
94 assert_equal(1, lines.length)
95
96 assert_equal("Test message!", lines.first)
97 end
98
99 def test_multipart_message
100 source = DummySource.new("sup-test://test_multipart_message")
101 source.messages = [ fixture_path('multi-part.eml') ]
102 source_info = 0
103
104 sup_message = Message.build_from_source(source, source_info)
105 sup_message.load_from_source!
106
107 # read the message body chunks
108 chunks = sup_message.load_from_source!
109
110 # this time there should be four chunks: first the quoted part of
111 # the message, then the non-quoted part, then the two attachments
112 assert_equal(4, chunks.length)
113
114 assert(chunks[0].is_a? Redwood::Chunk::Quote)
115 assert(chunks[1].is_a? Redwood::Chunk::Text)
116 assert(chunks[2].is_a? Redwood::Chunk::Attachment)
117 assert(chunks[3].is_a? Redwood::Chunk::Attachment)
118
119 # further testing of chunks will happen in test_message_chunks.rb
120 # (possibly not yet implemented)
121
122 end
123
124 def test_broken_message_1
125 source = DummySource.new("sup-test://test_broken_message_1")
126 source.messages = [ fixture_path('missing-from-to.eml') ]
127 source_info = 0
128
129 sup_message = Message.build_from_source(source, source_info)
130 sup_message.load_from_source!
131
132 to = sup_message.to
133
134 # there should no items, since the message doesn't have any recipients -- still not nil
135 assert(!to.nil?)
136 assert_empty(to)
137
138 # from will have bogus values
139 from = sup_message.from
140 # very basic email address check
141 assert_match(/\w+@\w+\.\w{2,4}/, from.email)
142 refute_nil(from.name)
143
144 end
145
146 def test_broken_message_2
147 source = DummySource.new("sup-test://test_broken_message_1")
148 source.messages = [ fixture_path('no-body.eml') ]
149 source_info = 0
150
151 sup_message = Message.build_from_source(source, source_info)
152 sup_message.load_from_source!
153
154 # read the message body chunks: no errors should reach this level
155
156 chunks = sup_message.load_from_source!
157
158 assert_empty(chunks)
159 end
160
161 def test_multipart_message_2
162 source = DummySource.new("sup-test://test_multipart_message_2")
163 source.messages = [ fixture_path('multi-part-2.eml') ]
164 source_info = 0
165
166 sup_message = Message.build_from_source(source, source_info)
167 sup_message.load_from_source!
168
169 chunks = sup_message.load_from_source! # read the message body chunks
170
171 assert_equal(1, chunks.length)
172 assert(chunks[0].is_a? Redwood::Chunk::Attachment)
173 end
174
175 def test_text_attachment_decoding
176 source = DummySource.new("sup-test://test_text_attachment_decoding")
177 source.messages = [ fixture_path('text-attachments-with-charset.eml') ]
178 source_info = 0
179
180 sup_message = Message.build_from_source(source, source_info)
181 sup_message.load_from_source!
182
183 chunks = sup_message.load_from_source!
184 assert_equal(7, chunks.length)
185 assert(chunks[0].is_a? Redwood::Chunk::Text)
186 ## The first attachment declares charset=us-ascii
187 assert(chunks[1].is_a? Redwood::Chunk::Attachment)
188 assert_equal(["This is ASCII"], chunks[1].lines)
189 ## The second attachment declares charset=koi8-r and has some Cyrillic
190 assert(chunks[2].is_a? Redwood::Chunk::Attachment)
191 assert_equal(["\u041f\u0440\u0438\u0432\u0435\u0442"], chunks[2].lines)
192 ## The third attachment declares charset=utf-8 and has an emoji
193 assert(chunks[3].is_a? Redwood::Chunk::Attachment)
194 assert_equal(["\u{1f602}"], chunks[3].lines)
195 ## The fourth attachment declares no charset and has a non-ASCII byte,
196 ## which will be replaced with U+FFFD REPLACEMENT CHARACTER
197 assert(chunks[4].is_a? Redwood::Chunk::Attachment)
198 assert_equal(["Embedded\ufffdgarbage"], chunks[4].lines)
199 ## The fifth attachment has an invalid charset, which should still
200 ## be handled gracefully
201 assert(chunks[5].is_a? Redwood::Chunk::Attachment)
202 assert_equal(["Example invalid charset"], chunks[5].lines)
203 ## The sixth attachment is UTF-7 encoded
204 assert(chunks[6].is_a? Redwood::Chunk::Attachment)
205 assert_equal(["This is ✨UTF-7✨"], chunks[6].lines)
206 end
207
208 def test_mailing_list_header
209 source = DummySource.new("sup-test://test_mailing_list_header")
210 source.messages = [ fixture_path('mailing-list-header.eml') ]
211 source_info = 0
212
213 sup_message = Message.build_from_source(source, source_info)
214 sup_message.load_from_source!
215
216 assert(sup_message.list_subscribe.nil?)
217 assert_equal("<https://lists.openembedded.org/g/openembedded-devel/unsub>",
218 sup_message.list_unsubscribe)
219 assert_equal("openembedded-devel@lists.openembedded.org", sup_message.list_address.email)
220 assert_equal("openembedded-devel", sup_message.list_address.name)
221 end
222
223 def test_blank_header_lines
224 source = DummySource.new("sup-test://test_blank_header_lines")
225 source.messages = [ fixture_path('blank-header-fields.eml') ]
226 source_info = 0
227
228 sup_message = Message.build_from_source(source, source_info)
229 sup_message.load_from_source!
230
231 # See how well parsing the message ID went.
232 id = sup_message.id
233 assert_equal("D3C12B2AD838B44DA9D6B2CA334246D011E72A73A4@PA-EXMBX04.widget.com", id)
234
235 # Look at another header field whose first line was blank.
236 list_unsubscribe = sup_message.list_unsubscribe
237 assert_equal("<http://mailman2.widget.com/mailman/listinfo/monitor-list>,\n\t" +
238 "<mailto:monitor-list-request@widget.com?subject=unsubscribe>",
239 list_unsubscribe)
240
241 end
242
243 def test_rfc2047_header_encoding
244 source = DummySource.new("sup-test://test_rfc2047_header_encoding")
245 source.messages = [ fixture_path("rfc2047-header-encoding.eml") ]
246 source_info = 0
247
248 sup_message = Message.build_from_source(source, source_info)
249 sup_message.load_from_source!
250
251 assert_equal("Hans Martin Djupvik, Ingrid Bø, Ирина Сидорова, " +
252 "Jesper Berg, Frida Engø " +
253 "bad: =?UTF16?q?badcharsetname?==?US-ASCII?b?/w?=" +
254 "=?UTF-7?Q?=41=6D=65=72=69=63=61=E2=80=99=73?=",
255 sup_message.subj)
256 end
257
258 def test_nonascii_header
259 ## Spammers sometimes send invalid high bytes in the headers.
260 ## They will be replaced with U+FFFD REPLACEMENT CHARACTER.
261 source = DummySource.new("sup-test://test_nonascii_header")
262 source.messages = [ fixture_path("non-ascii-header.eml") ]
263 source_info = 0
264
265 sup_message = Message.build_from_source(source, source_info)
266 sup_message.load_from_source!
267
268 assert_equal("SPAM \ufffd", sup_message.from.name)
269 assert_equal("spammer@example.com", sup_message.from.email)
270 assert_equal("spam \ufffd spam", sup_message.subj)
271 end
272
273 def test_utf8_header
274 ## UTF-8 is allowed in header values according to RFC6532.
275 source = DummySource.new("sup-test://test_utf8_header")
276 source.messages = [ fixture_path("utf8-header.eml") ]
277 source_info = 0
278
279 sup_message = Message.build_from_source(source, source_info)
280 sup_message.load_from_source!
281
282 assert_equal(Encoding::UTF_8, sup_message.subj.encoding)
283 assert_equal("LibraryThing: State of the Thing — January", sup_message.subj)
284 end
285
286 def test_nonascii_header_in_nested_message
287 source = DummySource.new("sup-test://test_nonascii_header_in_nested_message")
288 source.messages = [ fixture_path("non-ascii-header-in-nested-message.eml") ]
289 source_info = 0
290
291 sup_message = Message.build_from_source(source, source_info)
292 chunks = sup_message.load_from_source!
293
294 assert_equal(3, chunks.length)
295
296 assert(chunks[0].is_a? Redwood::Chunk::Text)
297
298 assert(chunks[1].is_a? Redwood::Chunk::EnclosedMessage)
299 assert_equal(4, chunks[1].lines.length)
300 assert_equal("From: SPAM \ufffd <spammer@example.com>", chunks[1].lines[0])
301 assert_equal("To: enclosed <enclosed@example.invalid>", chunks[1].lines[1])
302 assert_equal("Subject: spam \ufffd spam", chunks[1].lines[3])
303
304 assert(chunks[2].is_a? Redwood::Chunk::Text)
305 assert_equal(1, chunks[2].lines.length)
306 assert_equal("This is a spam.", chunks[2].lines[0])
307 end
308
309 def test_embedded_message
310 source = DummySource.new("sup-test://test_embedded_message")
311 source.messages = [ fixture_path("embedded-message.eml") ]
312 source_info = 0
313
314 sup_message = Message.build_from_source(source, source_info)
315
316 chunks = sup_message.load_from_source!
317 assert_equal(3, chunks.length)
318
319 assert_equal("sender@example.com", sup_message.from.email)
320 assert_equal("Sender", sup_message.from.name)
321 assert_equal(1, sup_message.to.length)
322 assert_equal("recipient@example.invalid", sup_message.to[0].email)
323 assert_equal("recipient", sup_message.to[0].name)
324 assert_equal("Email with embedded message", sup_message.subj)
325
326 assert(chunks[0].is_a? Redwood::Chunk::Text)
327 assert_equal("Example outer message.", chunks[0].lines[0])
328 assert_equal("Example second line.", chunks[0].lines[1])
329
330 assert(chunks[1].is_a? Redwood::Chunk::EnclosedMessage)
331 assert_equal(4, chunks[1].lines.length)
332 assert_equal("From: Embed sender <embed@example.com>", chunks[1].lines[0])
333 assert_equal("To: rcpt2 <rcpt2@example.invalid>", chunks[1].lines[1])
334 assert_equal("Date: ", chunks[1].lines[2][0..5])
335 assert_equal(
336 Time.rfc2822("Wed, 15 Jul 2020 12:34:56 +0000"),
337 Time.rfc2822(chunks[1].lines[2][6..-1])
338 )
339 assert_equal("Subject: Embedded subject line", chunks[1].lines[3])
340
341 assert(chunks[2].is_a? Redwood::Chunk::Text)
342 assert_equal(2, chunks[2].lines.length)
343 assert_equal("Example embedded message.", chunks[2].lines[0])
344 assert_equal("Second line.", chunks[2].lines[1])
345 end
346
347 def test_embedded_message_rfc6532
348 source = DummySource.new("sup-test://test_embedded_message_rfc6532")
349 source.messages = [ fixture_path("embedded-message-rfc6532.eml") ]
350
351 sup_message = Message.build_from_source(source, 0)
352
353 chunks = sup_message.load_from_source!
354 assert_equal(3, chunks.length)
355
356 assert_equal("Email with embedded message", sup_message.subj)
357
358 assert(chunks[0].is_a? Redwood::Chunk::Text)
359 assert_equal("Example outer message.", chunks[0].lines[0])
360
361 assert(chunks[1].is_a? Redwood::Chunk::EnclosedMessage)
362 assert_equal(4, chunks[1].lines.length)
363 assert_equal("From: Embed sender <embed@example.com>", chunks[1].lines[0])
364 assert_equal("To: rcpt2 <rcpt2@example.invalid>", chunks[1].lines[1])
365 assert_equal("Date: ", chunks[1].lines[2][0..5])
366 assert_equal(
367 Time.rfc2822("Sun, 12 May 2024 17:34:29 +1000"),
368 Time.rfc2822(chunks[1].lines[2][6..-1])
369 )
370 assert_equal("Subject: Embedded subject line with emoji ✨", chunks[1].lines[3])
371
372 assert(chunks[2].is_a? Redwood::Chunk::Text)
373 assert_equal("Example embedded message, with UTF-8 headers.", chunks[2].lines[0])
374 end
375
376 def test_malicious_attachment_names
377 source = DummySource.new("sup-test://test_blank_header_lines")
378 source.messages = [ fixture_path('malicious-attachment-names.eml') ]
379 source_info = 0
380
381 sup_message = Message.build_from_source(source, source_info)
382 chunks = sup_message.load_from_source!
383
384 # See if attachment filenames can be safely used for saving.
385 # We do that by verifying that any folder-related character (/ or \)
386 # are not interpreted: the filename must not be interpreted into a
387 # path.
388 fn = chunks[3].safe_filename
389 assert_equal(fn, File.basename(fn))
390 end
391
392 def test_invalid_date_header
393 fallback_date = Time.utc 2024, 5, 12, 15, 5, 56
394 source = DummySource.new("sup-test://test_invalid_date_header")
395 source.messages = [ fixture_path("invalid-date.eml") ]
396 source.fallback_date = fallback_date
397
398 sup_message = Message.build_from_source(source, 0)
399 sup_message.load_from_source!
400
401 assert_equal(fallback_date, sup_message.date)
402 end
403
404 # TODO: test different error cases, malformed messages etc.
405
406 # TODO: test different quoting styles, see that they are all divided
407 # to chunks properly
408
409 def test_zimbra_quote_with_bottom_post
410 # Zimbra does an Outlook-style "Original Message" delimiter and then *also*
411 # prefixes each quoted line with a > marker. That's okay until the sender
412 # tries to do the right thing and reply after the quote.
413 # In this case we want to just look at the > markers when determining where
414 # the quoted chunk ends.
415 source = DummySource.new("sup-test://test_zimbra_quote_with_bottom_post")
416 source.messages = [ fixture_path('zimbra-quote-with-bottom-post.eml') ]
417 source_info = 0
418
419 sup_message = Message.build_from_source(source, source_info)
420 chunks = sup_message.load_from_source!
421
422 assert_equal(3, chunks.length)
423
424 # TODO this chunk should ideally be part of the quote chunk after it.
425 assert(chunks[0].is_a? Redwood::Chunk::Text)
426 assert_equal(1, chunks[0].lines.length)
427 assert_equal("----- Original Message -----", chunks[0].lines.first)
428
429 assert(chunks[1].is_a? Redwood::Chunk::Quote)
430
431 assert(chunks[2].is_a? Redwood::Chunk::Text)
432 assert_equal(3, chunks[2].lines.length)
433 assert_equal("This is the reply from the Zimbra user.",
434 chunks[2].lines[2])
435 end
436 end
437
438 end
439
440 # vim:noai:ts=2:sw=2: