lib/sup/rfc2047.rb (2470B) - raw
1 ## from: http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/101949
2
3 # $Id: rfc2047.rb,v 1.4 2003/04/18 20:55:56 sam Exp $
4 # MODIFIED slightly by William Morgan
5 #
6 # An implementation of RFC 2047 decoding.
7 #
8 # This module depends on the iconv library by Nobuyoshi Nakada, which I've
9 # heard may be distributed as a standard part of Ruby 1.8. Many thanks to him
10 # for helping with building and using iconv.
11 #
12 # Thanks to "Josef 'Jupp' Schugt" <jupp / gmx.de> for pointing out an error with
13 # stateful character sets.
14 #
15 # Copyright (c) Sam Roberts <sroberts / uniserve.com> 2004
16 #
17 # This file is distributed under the same terms as Ruby.
18
19 module Rfc2047
20 WORD = %r{=\?([!\#$%&'*+-/0-9A-Z\\^\`a-z{|}~]+)\?([BbQq])\?([!->@-~ ]+)\?=} # :nodoc: 'stupid ruby-mode
21 WORDSEQ = %r{(#{WORD.source})\s+(?=#{WORD.source})}
22
23 def Rfc2047.is_encoded? s; s =~ WORD end
24
25 # Decodes a string, +from+, containing RFC 2047 encoded words into a target
26 # character set, +target+. See iconv_open(3) for information on the
27 # supported target encodings. If one of the encoded words cannot be
28 # converted to the target encoding, it is left in its encoded form.
29 def Rfc2047.decode_to(target, from)
30 from = from.gsub(WORDSEQ, '\1')
31 from.gsub(WORD) do
32 |word|
33 charset, encoding, text = $1, $2, $3
34
35 # B64 or QP decode, as necessary:
36 case encoding
37 when 'b', 'B'
38 ## Padding is optional in RFC 2047 words. Add some extra padding
39 ## before decoding the base64, otherwise on Ruby 2.0 the final byte
40 ## might be discarded.
41 text = (text + '===').unpack('m*')[0]
42
43 when 'q', 'Q'
44 # RFC 2047 has a variant of quoted printable where a ' ' character
45 # can be represented as an '_', rather than =32, so convert
46 # any of these that we find before doing the QP decoding.
47 text = text.tr("_", " ")
48 text = text.unpack('M*')[0]
49
50 # Don't need an else, because no other values can be matched in a
51 # WORD.
52 end
53
54 # Handle UTF-7 specially because Ruby doesn't actually support it as
55 # a normal character encoding.
56 if charset == 'UTF-7'
57 begin
58 next text.decode_utf7.encode(target)
59 rescue ArgumentError, EncodingError
60 next word
61 end
62 end
63
64 begin
65 text.force_encoding(charset).encode(target)
66 rescue ArgumentError, EncodingError
67 word
68 end
69 end
70 end
71 end