commit d84d6f266e37cae8113099a5297e898585f41643
parent 283a8fe5a5f6c362452eb89e9e6f007ec51e822c
Author: Dan Callaghan <djc@djc.id.au>
Date: Thu, 5 May 2022 11:09:05 +1000
handle malformed UTF-7 in RFC2047 words
Turns out spammers are not very good at following standards. The code
for handling UTF-7 added in commit 9b4d42a2 would crash like this:
--- Encoding::UndefinedConversionError from thread: poll after loading inbox
"\xE2" from ASCII-8BIT to UTF-8
/home/dan/sup/lib/sup/rfc2047.rb:58:in `encode'
/home/dan/sup/lib/sup/rfc2047.rb:58:in `block in decode_to'
/home/dan/sup/lib/sup/rfc2047.rb:31:in `gsub'
/home/dan/sup/lib/sup/rfc2047.rb:31:in `decode_to'
/home/dan/sup/lib/sup/message.rb:77:in `decode_header_field'
/home/dan/sup/lib/sup/message.rb:81:in `block in parse_header'
/home/dan/sup/lib/sup/util.rb:648:in `[]'
/home/dan/sup/lib/sup/message.rb:117:in `parse_header'
/home/dan/sup/lib/sup/message.rb:270:in `load_from_source!'
[...]
when given an RFC2047-encoded word which claims to be UTF-7 but contains
high bytes, as in a spam subject line I recently received.
Diffstat:
3 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/lib/sup/rfc2047.rb b/lib/sup/rfc2047.rb
@@ -53,11 +53,17 @@ module Rfc2047
# Handle UTF-7 specially because Ruby doesn't actually support it as
# a normal character encoding.
- next text.decode_utf7.encode(target) if charset == 'UTF-7'
+ if charset == 'UTF-7'
+ begin
+ next text.decode_utf7.encode(target)
+ rescue ArgumentError, EncodingError
+ next word
+ end
+ end
begin
text.force_encoding(charset).encode(target)
- rescue ArgumentError, Encoding::InvalidByteSequenceError
+ rescue ArgumentError, EncodingError
word
end
end
diff --git a/test/fixtures/rfc2047-header-encoding.eml b/test/fixtures/rfc2047-header-encoding.eml
@@ -7,6 +7,7 @@ Subject:
=?UTF-16?b?//4sACAASgBlAHMAcABlAHIAIABCAGUAcgBnAA?=
=?UTF-7?b?LCBGcmlkYSBFbmcrQVBnLQ?=
bad: =?UTF16?q?badcharsetname?= =?US-ASCII?b?/w?=
+ =?UTF-7?Q?=41=6D=65=72=69=63=61=E2=80=99=73?=
The subject header contains various RFC2047 encoded words.
For completeness we test both base64 and quoted-printable, and some
diff --git a/test/test_message.rb b/test/test_message.rb
@@ -249,7 +249,8 @@ class TestMessage < Minitest::Test
assert_equal("Hans Martin Djupvik, Ingrid Bø, Ирина Сидорова, " +
"Jesper Berg, Frida Engø " +
- "bad: =?UTF16?q?badcharsetname?==?US-ASCII?b?/w?=",
+ "bad: =?UTF16?q?badcharsetname?==?US-ASCII?b?/w?=" +
+ "=?UTF-7?Q?=41=6D=65=72=69=63=61=E2=80=99=73?=",
sup_message.subj)
end