1
|
# encoding: utf-8
|
2
|
|
3
|
module Mail
|
4
|
# Raised when attempting to decode an unknown encoding type
|
5
|
class UnknownEncodingType < StandardError #:nodoc:
|
6
|
end
|
7
|
|
8
|
module Encodings
|
9
|
|
10
|
include Mail::Patterns
|
11
|
extend Mail::Utilities
|
12
|
|
13
|
@transfer_encodings = {}
|
14
|
|
15
|
# Register transfer encoding
|
16
|
#
|
17
|
# Example
|
18
|
#
|
19
|
# Encodings.register "base64", Mail::Encodings::Base64
|
20
|
def Encodings.register(name, cls)
|
21
|
@transfer_encodings[get_name(name)] = cls
|
22
|
end
|
23
|
|
24
|
# Is the encoding we want defined?
|
25
|
#
|
26
|
# Example:
|
27
|
#
|
28
|
# Encodings.defined?(:base64) #=> true
|
29
|
def Encodings.defined?( str )
|
30
|
@transfer_encodings.include? get_name(str)
|
31
|
end
|
32
|
|
33
|
# Gets a defined encoding type, QuotedPrintable or Base64 for now.
|
34
|
#
|
35
|
# Each encoding needs to be defined as a Mail::Encodings::ClassName for
|
36
|
# this to work, allows us to add other encodings in the future.
|
37
|
#
|
38
|
# Example:
|
39
|
#
|
40
|
# Encodings.get_encoding(:base64) #=> Mail::Encodings::Base64
|
41
|
def Encodings.get_encoding( str )
|
42
|
@transfer_encodings[get_name(str)]
|
43
|
end
|
44
|
|
45
|
def Encodings.get_all
|
46
|
@transfer_encodings.values
|
47
|
end
|
48
|
|
49
|
def Encodings.get_name(enc)
|
50
|
enc = enc.to_s.gsub("-", "_").downcase
|
51
|
end
|
52
|
|
53
|
# Encodes a parameter value using URI Escaping, note the language field 'en' can
|
54
|
# be set using Mail::Configuration, like so:
|
55
|
#
|
56
|
# Mail.defaults do
|
57
|
# param_encode_language 'jp'
|
58
|
# end
|
59
|
#
|
60
|
# The character set used for encoding will either be the value of $KCODE for
|
61
|
# Ruby < 1.9 or the encoding on the string passed in.
|
62
|
#
|
63
|
# Example:
|
64
|
#
|
65
|
# Mail::Encodings.param_encode("This is fun") #=> "us-ascii'en'This%20is%20fun"
|
66
|
def Encodings.param_encode(str)
|
67
|
case
|
68
|
when str.ascii_only? && str =~ TOKEN_UNSAFE
|
69
|
%Q{"#{str}"}
|
70
|
when str.ascii_only?
|
71
|
str
|
72
|
else
|
73
|
RubyVer.param_encode(str)
|
74
|
end
|
75
|
end
|
76
|
|
77
|
# Decodes a parameter value using URI Escaping.
|
78
|
#
|
79
|
# Example:
|
80
|
#
|
81
|
# Mail::Encodings.param_decode("This%20is%20fun", 'us-ascii') #=> "This is fun"
|
82
|
#
|
83
|
# str = Mail::Encodings.param_decode("This%20is%20fun", 'iso-8559-1')
|
84
|
# str.encoding #=> 'ISO-8859-1' ## Only on Ruby 1.9
|
85
|
# str #=> "This is fun"
|
86
|
def Encodings.param_decode(str, encoding)
|
87
|
RubyVer.param_decode(str, encoding)
|
88
|
end
|
89
|
|
90
|
# Decodes or encodes a string as needed for either Base64 or QP encoding types in
|
91
|
# the =?<encoding>?[QB]?<string>?=" format.
|
92
|
#
|
93
|
# The output type needs to be :decode to decode the input string or :encode to
|
94
|
# encode the input string. The character set used for encoding will either be
|
95
|
# the value of $KCODE for Ruby < 1.9 or the encoding on the string passed in.
|
96
|
#
|
97
|
# On encoding, will only send out Base64 encoded strings.
|
98
|
def Encodings.decode_encode(str, output_type)
|
99
|
case
|
100
|
when output_type == :decode
|
101
|
Encodings.value_decode(str)
|
102
|
else
|
103
|
if str.ascii_only?
|
104
|
str
|
105
|
else
|
106
|
Encodings.b_value_encode(str, find_encoding(str))
|
107
|
end
|
108
|
end
|
109
|
end
|
110
|
|
111
|
# Decodes a given string as Base64 or Quoted Printable, depending on what
|
112
|
# type it is.
|
113
|
#
|
114
|
# String has to be of the format =?<encoding>?[QB]?<string>?=
|
115
|
def Encodings.value_decode(str)
|
116
|
# Optimization: If there's no encoded-words in the string, just return it
|
117
|
return str unless str =~ /\=\?[^?]+\?[QB]\?[^?]+?\?\=/xmi
|
118
|
|
119
|
lines = collapse_adjacent_encodings(str)
|
120
|
|
121
|
# Split on white-space boundaries with capture, so we capture the white-space as well
|
122
|
lines.map do |line|
|
123
|
line.split(/([ \t])/).map do |text|
|
124
|
if text.index('=?').nil?
|
125
|
text
|
126
|
else
|
127
|
# Search for occurences of quoted strings or plain strings
|
128
|
text.scan(/( # Group around entire regex to include it in matches
|
129
|
\=\?[^?]+\?([QB])\?[^?]+?\?\= # Quoted String with subgroup for encoding method
|
130
|
| # or
|
131
|
.+?(?=\=\?|$) # Plain String
|
132
|
)/xmi).map do |matches|
|
133
|
string, method = *matches
|
134
|
if method == 'b' || method == 'B'
|
135
|
b_value_decode(string)
|
136
|
elsif method == 'q' || method == 'Q'
|
137
|
q_value_decode(string)
|
138
|
else
|
139
|
string
|
140
|
end
|
141
|
end
|
142
|
end
|
143
|
end
|
144
|
end.flatten.join("")
|
145
|
end
|
146
|
|
147
|
# Takes an encoded string of the format =?<encoding>?[QB]?<string>?=
|
148
|
def Encodings.unquote_and_convert_to(str, to_encoding)
|
149
|
output = value_decode( str ).to_s # output is already converted to UTF-8
|
150
|
|
151
|
if 'utf8' == to_encoding.to_s.downcase.gsub("-", "")
|
152
|
output
|
153
|
elsif to_encoding
|
154
|
begin
|
155
|
if RUBY_VERSION >= '1.9'
|
156
|
output.encode(to_encoding)
|
157
|
else
|
158
|
require 'iconv'
|
159
|
Iconv.iconv(to_encoding, 'UTF-8', output).first
|
160
|
end
|
161
|
rescue Iconv::IllegalSequence, Iconv::InvalidEncoding, Errno::EINVAL
|
162
|
# the 'from' parameter specifies a charset other than what the text
|
163
|
# actually is...not much we can do in this case but just return the
|
164
|
# unconverted text.
|
165
|
#
|
166
|
# Ditto if either parameter represents an unknown charset, like
|
167
|
# X-UNKNOWN.
|
168
|
output
|
169
|
end
|
170
|
else
|
171
|
output
|
172
|
end
|
173
|
end
|
174
|
|
175
|
def Encodings.address_encode(address, charset = 'utf-8')
|
176
|
if address.is_a?(Array)
|
177
|
# loop back through for each element
|
178
|
address.compact.map { |a| Encodings.address_encode(a, charset) }.join(", ")
|
179
|
else
|
180
|
# find any word boundary that is not ascii and encode it
|
181
|
encode_non_usascii(address, charset) if address
|
182
|
end
|
183
|
end
|
184
|
|
185
|
def Encodings.encode_non_usascii(address, charset)
|
186
|
return address if address.ascii_only? or charset.nil?
|
187
|
us_ascii = %Q{\x00-\x7f}
|
188
|
# Encode any non usascii strings embedded inside of quotes
|
189
|
address = address.gsub(/(".*?[^#{us_ascii}].*?")/) { |s| Encodings.b_value_encode(unquote(s), charset) }
|
190
|
# Then loop through all remaining items and encode as needed
|
191
|
tokens = address.split(/\s/)
|
192
|
map_with_index(tokens) do |word, i|
|
193
|
if word.ascii_only?
|
194
|
word
|
195
|
else
|
196
|
previous_non_ascii = i>0 && tokens[i-1] && !tokens[i-1].ascii_only?
|
197
|
if previous_non_ascii #why are we adding an extra space here?
|
198
|
word = " #{word}"
|
199
|
end
|
200
|
Encodings.b_value_encode(word, charset)
|
201
|
end
|
202
|
end.join(' ')
|
203
|
end
|
204
|
|
205
|
# Encode a string with Base64 Encoding and returns it ready to be inserted
|
206
|
# as a value for a field, that is, in the =?<charset>?B?<string>?= format
|
207
|
#
|
208
|
# Example:
|
209
|
#
|
210
|
# Encodings.b_value_encode('This is あ string', 'UTF-8')
|
211
|
# #=> "=?UTF-8?B?VGhpcyBpcyDjgYIgc3RyaW5n?="
|
212
|
def Encodings.b_value_encode(encoded_str, encoding = nil)
|
213
|
return encoded_str if encoded_str.to_s.ascii_only?
|
214
|
string, encoding = RubyVer.b_value_encode(encoded_str, encoding)
|
215
|
map_lines(string) do |str|
|
216
|
"=?#{encoding}?B?#{str.chomp}?="
|
217
|
end.join(" ")
|
218
|
end
|
219
|
|
220
|
# Encode a string with Quoted-Printable Encoding and returns it ready to be inserted
|
221
|
# as a value for a field, that is, in the =?<charset>?Q?<string>?= format
|
222
|
#
|
223
|
# Example:
|
224
|
#
|
225
|
# Encodings.q_value_encode('This is あ string', 'UTF-8')
|
226
|
# #=> "=?UTF-8?Q?This_is_=E3=81=82_string?="
|
227
|
def Encodings.q_value_encode(encoded_str, encoding = nil)
|
228
|
return encoded_str if encoded_str.to_s.ascii_only?
|
229
|
string, encoding = RubyVer.q_value_encode(encoded_str, encoding)
|
230
|
string.gsub!("=\r\n", '') # We already have limited the string to the length we want
|
231
|
map_lines(string) do |str|
|
232
|
"=?#{encoding}?Q?#{str.chomp.gsub(/ /, '_')}?="
|
233
|
end.join(" ")
|
234
|
end
|
235
|
|
236
|
private
|
237
|
|
238
|
# Decodes a Base64 string from the "=?UTF-8?B?VGhpcyBpcyDjgYIgc3RyaW5n?=" format
|
239
|
#
|
240
|
# Example:
|
241
|
#
|
242
|
# Encodings.b_value_decode("=?UTF-8?B?VGhpcyBpcyDjgYIgc3RyaW5n?=")
|
243
|
# #=> 'This is あ string'
|
244
|
def Encodings.b_value_decode(str)
|
245
|
RubyVer.b_value_decode(str)
|
246
|
end
|
247
|
|
248
|
# Decodes a Quoted-Printable string from the "=?UTF-8?Q?This_is_=E3=81=82_string?=" format
|
249
|
#
|
250
|
# Example:
|
251
|
#
|
252
|
# Encodings.q_value_decode("=?UTF-8?Q?This_is_=E3=81=82_string?=")
|
253
|
# #=> 'This is あ string'
|
254
|
def Encodings.q_value_decode(str)
|
255
|
RubyVer.q_value_decode(str)
|
256
|
end
|
257
|
|
258
|
def Encodings.split_encoding_from_string( str )
|
259
|
match = str.match(/\=\?([^?]+)?\?[QB]\?(.+)?\?\=/mi)
|
260
|
if match
|
261
|
match[1]
|
262
|
else
|
263
|
nil
|
264
|
end
|
265
|
end
|
266
|
|
267
|
def Encodings.find_encoding(str)
|
268
|
RUBY_VERSION >= '1.9' ? str.encoding : $KCODE
|
269
|
end
|
270
|
|
271
|
# Gets the encoding type (Q or B) from the string.
|
272
|
def Encodings.split_value_encoding_from_string(str)
|
273
|
match = str.match(/\=\?[^?]+?\?([QB])\?(.+)?\?\=/mi)
|
274
|
if match
|
275
|
match[1]
|
276
|
else
|
277
|
nil
|
278
|
end
|
279
|
end
|
280
|
|
281
|
# When the encoded string consists of multiple lines, lines with the same
|
282
|
# encoding (Q or B) can be joined together.
|
283
|
#
|
284
|
# String has to be of the format =?<encoding>?[QB]?<string>?=
|
285
|
def Encodings.collapse_adjacent_encodings(str)
|
286
|
lines = str.split(/(\?=)\s*(=\?)/).each_slice(2).map(&:join)
|
287
|
results = []
|
288
|
previous_encoding = nil
|
289
|
|
290
|
lines.each do |line|
|
291
|
encoding = split_value_encoding_from_string(line)
|
292
|
|
293
|
if encoding == previous_encoding
|
294
|
line = results.pop + line
|
295
|
# line.gsub!(/\?\=\=\?.+?\?[QqBb]\?/m, '')
|
296
|
end
|
297
|
|
298
|
previous_encoding = encoding
|
299
|
results << line
|
300
|
end
|
301
|
|
302
|
results
|
303
|
end
|
304
|
end
|
305
|
end
|