test/test_message.rb (15832B) - raw
1 #!/usr/bin/ruby
2
3 require 'test_helper'
4 require 'sup'
5 require 'stringio'
6
7 require 'dummy_source'
8
9 module Redwood
10
11 class TestMessage < Minitest::Test
12
13 def setup
14 @path = Dir.mktmpdir
15 Redwood::HookManager.init File.join(@path, 'hooks')
16 end
17
18 def teardown
19 Redwood::HookManager.deinstantiate!
20 FileUtils.rm_r @path
21 end
22
23 def test_simple_message
24 source = DummySource.new("sup-test://test_simple_message")
25 source.messages = [ fixture_path('simple-message.eml') ]
26 source_info = 0
27
28 sup_message = Message.build_from_source(source, source_info)
29 sup_message.load_from_source!
30
31 # see how well parsing the header went
32 to = sup_message.to
33 assert(to.is_a? Array)
34 assert(to.first.is_a? Person)
35 assert_equal(1, to.length)
36
37 # sup doesn't do capitalized letters in email addresses
38 assert_equal("fake_receiver@localhost", to[0].email)
39 assert_equal("Fake Receiver", to[0].name)
40
41 from = sup_message.from
42 assert(from.is_a? Person)
43 assert_equal("fake_sender@example.invalid", from.email)
44 assert_equal("Fake Sender", from.name)
45
46 subj = sup_message.subj
47 assert_equal("Re: Test message subject", subj)
48
49 list_subscribe = sup_message.list_subscribe
50 assert_equal("<mailto:example-subscribe@example.invalid>", list_subscribe)
51
52 list_unsubscribe = sup_message.list_unsubscribe
53 assert_equal("<mailto:example-unsubscribe@example.invalid>", list_unsubscribe)
54
55 list_address = sup_message.list_address
56 assert_equal("example@example.invalid", list_address.email)
57 assert_equal("example", list_address.name)
58
59 date = sup_message.date
60 assert_equal(Time.parse("Sun, 9 Dec 2007 21:48:19 +0200"), date)
61
62 id = sup_message.id
63 assert_equal("20071209194819.GA25972@example.invalid", id)
64
65 refs = sup_message.refs
66 assert_equal(1, refs.length)
67 assert_equal("E1J1Rvb-0006k2-CE@localhost.localdomain", refs[0])
68
69 replytos = sup_message.replytos
70 assert_equal(1, replytos.length)
71 assert_equal("E1J1Rvb-0006k2-CE@localhost.localdomain", replytos[0])
72
73 assert_empty(sup_message.cc)
74 assert_empty(sup_message.bcc)
75
76 recipient_email = sup_message.recipient_email
77 assert_equal("fake_receiver@localhost", recipient_email)
78
79 message_source = sup_message.source
80 assert_equal(message_source, source)
81
82 message_source_info = sup_message.source_info
83 assert_equal(message_source_info, source_info)
84
85 # read the message body chunks
86 chunks = sup_message.load_from_source!
87
88 # there should be only one chunk
89 assert_equal(1, chunks.length)
90
91 lines = chunks.first.lines
92
93 # there should be only one line
94 assert_equal(1, lines.length)
95
96 assert_equal("Test message!", lines.first)
97 end
98
99 def test_multipart_message
100 source = DummySource.new("sup-test://test_multipart_message")
101 source.messages = [ fixture_path('multi-part.eml') ]
102 source_info = 0
103
104 sup_message = Message.build_from_source(source, source_info)
105 sup_message.load_from_source!
106
107 # read the message body chunks
108 chunks = sup_message.load_from_source!
109
110 # this time there should be four chunks: first the quoted part of
111 # the message, then the non-quoted part, then the two attachments
112 assert_equal(4, chunks.length)
113
114 assert(chunks[0].is_a? Redwood::Chunk::Quote)
115 assert(chunks[1].is_a? Redwood::Chunk::Text)
116 assert(chunks[2].is_a? Redwood::Chunk::Attachment)
117 assert(chunks[3].is_a? Redwood::Chunk::Attachment)
118
119 # further testing of chunks will happen in test_message_chunks.rb
120 # (possibly not yet implemented)
121
122 end
123
124 def test_broken_message_1
125 source = DummySource.new("sup-test://test_broken_message_1")
126 source.messages = [ fixture_path('missing-from-to.eml') ]
127 source_info = 0
128
129 sup_message = Message.build_from_source(source, source_info)
130 sup_message.load_from_source!
131
132 to = sup_message.to
133
134 # there should no items, since the message doesn't have any recipients -- still not nil
135 assert(!to.nil?)
136 assert_empty(to)
137
138 # from will have bogus values
139 from = sup_message.from
140 # very basic email address check
141 assert_match(/\w+@\w+\.\w{2,4}/, from.email)
142 refute_nil(from.name)
143
144 end
145
146 def test_broken_message_2
147 source = DummySource.new("sup-test://test_broken_message_1")
148 source.messages = [ fixture_path('no-body.eml') ]
149 source_info = 0
150
151 sup_message = Message.build_from_source(source, source_info)
152 sup_message.load_from_source!
153
154 # read the message body chunks: no errors should reach this level
155
156 chunks = sup_message.load_from_source!
157
158 assert_empty(chunks)
159 end
160
161 def test_multipart_message_2
162 source = DummySource.new("sup-test://test_multipart_message_2")
163 source.messages = [ fixture_path('multi-part-2.eml') ]
164 source_info = 0
165
166 sup_message = Message.build_from_source(source, source_info)
167 sup_message.load_from_source!
168
169 chunks = sup_message.load_from_source! # read the message body chunks
170
171 assert_equal(1, chunks.length)
172 assert(chunks[0].is_a? Redwood::Chunk::Attachment)
173 end
174
175 def test_text_attachment_decoding
176 source = DummySource.new("sup-test://test_text_attachment_decoding")
177 source.messages = [ fixture_path('text-attachments-with-charset.eml') ]
178 source_info = 0
179
180 sup_message = Message.build_from_source(source, source_info)
181 sup_message.load_from_source!
182
183 chunks = sup_message.load_from_source!
184 assert_equal(7, chunks.length)
185 assert(chunks[0].is_a? Redwood::Chunk::Text)
186 ## The first attachment declares charset=us-ascii
187 assert(chunks[1].is_a? Redwood::Chunk::Attachment)
188 assert_equal(["This is ASCII"], chunks[1].lines)
189 ## The second attachment declares charset=koi8-r and has some Cyrillic
190 assert(chunks[2].is_a? Redwood::Chunk::Attachment)
191 assert_equal(["\u041f\u0440\u0438\u0432\u0435\u0442"], chunks[2].lines)
192 ## The third attachment declares charset=utf-8 and has an emoji
193 assert(chunks[3].is_a? Redwood::Chunk::Attachment)
194 assert_equal(["\u{1f602}"], chunks[3].lines)
195 ## The fourth attachment declares no charset and has a non-ASCII byte,
196 ## which will be replaced with U+FFFD REPLACEMENT CHARACTER
197 assert(chunks[4].is_a? Redwood::Chunk::Attachment)
198 assert_equal(["Embedded\ufffdgarbage"], chunks[4].lines)
199 ## The fifth attachment has an invalid charset, which should still
200 ## be handled gracefully
201 assert(chunks[5].is_a? Redwood::Chunk::Attachment)
202 assert_equal(["Example invalid charset"], chunks[5].lines)
203 ## The sixth attachment is UTF-7 encoded
204 assert(chunks[6].is_a? Redwood::Chunk::Attachment)
205 assert_equal(["This is ✨UTF-7✨"], chunks[6].lines)
206 end
207
208 def test_mailing_list_header
209 source = DummySource.new("sup-test://test_mailing_list_header")
210 source.messages = [ fixture_path('mailing-list-header.eml') ]
211 source_info = 0
212
213 sup_message = Message.build_from_source(source, source_info)
214 sup_message.load_from_source!
215
216 assert(sup_message.list_subscribe.nil?)
217 assert_equal("<https://lists.openembedded.org/g/openembedded-devel/unsub>",
218 sup_message.list_unsubscribe)
219 assert_equal("openembedded-devel@lists.openembedded.org", sup_message.list_address.email)
220 assert_equal("openembedded-devel", sup_message.list_address.name)
221 end
222
223 def test_blank_header_lines
224 source = DummySource.new("sup-test://test_blank_header_lines")
225 source.messages = [ fixture_path('blank-header-fields.eml') ]
226 source_info = 0
227
228 sup_message = Message.build_from_source(source, source_info)
229 sup_message.load_from_source!
230
231 # See how well parsing the message ID went.
232 id = sup_message.id
233 assert_equal("D3C12B2AD838B44DA9D6B2CA334246D011E72A73A4@PA-EXMBX04.widget.com", id)
234
235 # Look at another header field whose first line was blank.
236 list_unsubscribe = sup_message.list_unsubscribe
237 assert_equal("<http://mailman2.widget.com/mailman/listinfo/monitor-list>,\n\t" +
238 "<mailto:monitor-list-request@widget.com?subject=unsubscribe>",
239 list_unsubscribe)
240
241 end
242
243 def test_rfc2047_header_encoding
244 source = DummySource.new("sup-test://test_rfc2047_header_encoding")
245 source.messages = [ fixture_path("rfc2047-header-encoding.eml") ]
246 source_info = 0
247
248 sup_message = Message.build_from_source(source, source_info)
249 sup_message.load_from_source!
250
251 assert_equal("Hans Martin Djupvik, Ingrid Bø, Ирина Сидорова, " +
252 "Jesper Berg, Frida Engø " +
253 "bad: =?UTF16?q?badcharsetname?==?US-ASCII?b?/w?=" +
254 "=?UTF-7?Q?=41=6D=65=72=69=63=61=E2=80=99=73?=",
255 sup_message.subj)
256 assert_equal "=?utf-8?q?YouTube-tj=E4nst?=", sup_message.from.name
257 end
258
259 def test_nonascii_header
260 ## Spammers sometimes send invalid high bytes in the headers.
261 ## They will be replaced with U+FFFD REPLACEMENT CHARACTER.
262 source = DummySource.new("sup-test://test_nonascii_header")
263 source.messages = [ fixture_path("non-ascii-header.eml") ]
264 source_info = 0
265
266 sup_message = Message.build_from_source(source, source_info)
267 sup_message.load_from_source!
268
269 assert_equal("SPAM \ufffd", sup_message.from.name)
270 assert_equal("spammer@example.com", sup_message.from.email)
271 assert_equal("spam \ufffd spam", sup_message.subj)
272 end
273
274 def test_utf8_header
275 ## UTF-8 is allowed in header values according to RFC6532.
276 source = DummySource.new("sup-test://test_utf8_header")
277 source.messages = [ fixture_path("utf8-header.eml") ]
278 source_info = 0
279
280 sup_message = Message.build_from_source(source, source_info)
281 sup_message.load_from_source!
282
283 assert_equal(Encoding::UTF_8, sup_message.subj.encoding)
284 assert_equal("LibraryThing: State of the Thing — January", sup_message.subj)
285 end
286
287 def test_nonascii_header_in_nested_message
288 source = DummySource.new("sup-test://test_nonascii_header_in_nested_message")
289 source.messages = [ fixture_path("non-ascii-header-in-nested-message.eml") ]
290 source_info = 0
291
292 sup_message = Message.build_from_source(source, source_info)
293 chunks = sup_message.load_from_source!
294
295 assert_equal(3, chunks.length)
296
297 assert(chunks[0].is_a? Redwood::Chunk::Text)
298
299 assert(chunks[1].is_a? Redwood::Chunk::EnclosedMessage)
300 assert_equal(4, chunks[1].lines.length)
301 assert_equal("From: SPAM \ufffd <spammer@example.com>", chunks[1].lines[0])
302 assert_equal("To: enclosed <enclosed@example.invalid>", chunks[1].lines[1])
303 assert_equal("Subject: spam \ufffd spam", chunks[1].lines[3])
304
305 assert(chunks[2].is_a? Redwood::Chunk::Text)
306 assert_equal(1, chunks[2].lines.length)
307 assert_equal("This is a spam.", chunks[2].lines[0])
308 end
309
310 def test_embedded_message
311 source = DummySource.new("sup-test://test_embedded_message")
312 source.messages = [ fixture_path("embedded-message.eml") ]
313 source_info = 0
314
315 sup_message = Message.build_from_source(source, source_info)
316
317 chunks = sup_message.load_from_source!
318 assert_equal(3, chunks.length)
319
320 assert_equal("sender@example.com", sup_message.from.email)
321 assert_equal("Sender", sup_message.from.name)
322 assert_equal(1, sup_message.to.length)
323 assert_equal("recipient@example.invalid", sup_message.to[0].email)
324 assert_equal("recipient", sup_message.to[0].name)
325 assert_equal("Email with embedded message", sup_message.subj)
326
327 assert(chunks[0].is_a? Redwood::Chunk::Text)
328 assert_equal("Example outer message.", chunks[0].lines[0])
329 assert_equal("Example second line.", chunks[0].lines[1])
330
331 assert(chunks[1].is_a? Redwood::Chunk::EnclosedMessage)
332 assert_equal(4, chunks[1].lines.length)
333 assert_equal("From: Embed sender <embed@example.com>", chunks[1].lines[0])
334 assert_equal("To: rcpt2 <rcpt2@example.invalid>", chunks[1].lines[1])
335 assert_equal("Date: ", chunks[1].lines[2][0..5])
336 assert_equal(
337 Time.rfc2822("Wed, 15 Jul 2020 12:34:56 +0000"),
338 Time.rfc2822(chunks[1].lines[2][6..-1])
339 )
340 assert_equal("Subject: Embedded subject line", chunks[1].lines[3])
341
342 assert(chunks[2].is_a? Redwood::Chunk::Text)
343 assert_equal(2, chunks[2].lines.length)
344 assert_equal("Example embedded message.", chunks[2].lines[0])
345 assert_equal("Second line.", chunks[2].lines[1])
346 end
347
348 def test_embedded_message_rfc6532
349 source = DummySource.new("sup-test://test_embedded_message_rfc6532")
350 source.messages = [ fixture_path("embedded-message-rfc6532.eml") ]
351
352 sup_message = Message.build_from_source(source, 0)
353
354 chunks = sup_message.load_from_source!
355 assert_equal(3, chunks.length)
356
357 assert_equal("Email with embedded message", sup_message.subj)
358
359 assert(chunks[0].is_a? Redwood::Chunk::Text)
360 assert_equal("Example outer message.", chunks[0].lines[0])
361
362 assert(chunks[1].is_a? Redwood::Chunk::EnclosedMessage)
363 assert_equal(4, chunks[1].lines.length)
364 assert_equal("From: Embed sender <embed@example.com>", chunks[1].lines[0])
365 assert_equal("To: rcpt2 <rcpt2@example.invalid>", chunks[1].lines[1])
366 assert_equal("Date: ", chunks[1].lines[2][0..5])
367 assert_equal(
368 Time.rfc2822("Sun, 12 May 2024 17:34:29 +1000"),
369 Time.rfc2822(chunks[1].lines[2][6..-1])
370 )
371 assert_equal("Subject: Embedded subject line with emoji ✨", chunks[1].lines[3])
372
373 assert(chunks[2].is_a? Redwood::Chunk::Text)
374 assert_equal("Example embedded message, with UTF-8 headers.", chunks[2].lines[0])
375 end
376
377 def test_malicious_attachment_names
378 source = DummySource.new("sup-test://test_blank_header_lines")
379 source.messages = [ fixture_path('malicious-attachment-names.eml') ]
380 source_info = 0
381
382 sup_message = Message.build_from_source(source, source_info)
383 chunks = sup_message.load_from_source!
384
385 # See if attachment filenames can be safely used for saving.
386 # We do that by verifying that any folder-related character (/ or \)
387 # are not interpreted: the filename must not be interpreted into a
388 # path.
389 fn = chunks[3].safe_filename
390 assert_equal(fn, File.basename(fn))
391 end
392
393 def test_invalid_date_header
394 fallback_date = Time.utc 2024, 5, 12, 15, 5, 56
395 source = DummySource.new("sup-test://test_invalid_date_header")
396 source.messages = [ fixture_path("invalid-date.eml") ]
397 source.fallback_date = fallback_date
398
399 sup_message = Message.build_from_source(source, 0)
400 sup_message.load_from_source!
401
402 assert_equal(fallback_date, sup_message.date)
403 end
404
405 # TODO: test different error cases, malformed messages etc.
406
407 # TODO: test different quoting styles, see that they are all divided
408 # to chunks properly
409
410 def test_zimbra_quote_with_bottom_post
411 # Zimbra does an Outlook-style "Original Message" delimiter and then *also*
412 # prefixes each quoted line with a > marker. That's okay until the sender
413 # tries to do the right thing and reply after the quote.
414 # In this case we want to just look at the > markers when determining where
415 # the quoted chunk ends.
416 source = DummySource.new("sup-test://test_zimbra_quote_with_bottom_post")
417 source.messages = [ fixture_path('zimbra-quote-with-bottom-post.eml') ]
418 source_info = 0
419
420 sup_message = Message.build_from_source(source, source_info)
421 chunks = sup_message.load_from_source!
422
423 assert_equal(3, chunks.length)
424
425 # TODO this chunk should ideally be part of the quote chunk after it.
426 assert(chunks[0].is_a? Redwood::Chunk::Text)
427 assert_equal(1, chunks[0].lines.length)
428 assert_equal("----- Original Message -----", chunks[0].lines.first)
429
430 assert(chunks[1].is_a? Redwood::Chunk::Quote)
431
432 assert(chunks[2].is_a? Redwood::Chunk::Text)
433 assert_equal(3, chunks[2].lines.length)
434 assert_equal("This is the reply from the Zimbra user.",
435 chunks[2].lines[2])
436 end
437 end
438
439 end
440
441 # vim:noai:ts=2:sw=2: