Defect #41464 » fix-guess_encoding.patch
app/models/import.rb | ||
---|---|---|
69 | 69 |
encoding = lu(user, :general_csv_encoding) |
70 | 70 |
if file_exists? |
71 | 71 |
begin |
72 |
content = File.read(filepath, 256)
|
|
72 |
content = File.read(filepath, 4.kilobytes)
|
|
73 | 73 | |
74 | 74 |
separator = [',', ';'].max_by {|sep| content.count(sep)} |
75 | 75 |
wrapper = ['"', "'"].max_by {|quote_char| content.count(quote_char)} |
lib/redmine/codeset_util.rb | ||
---|---|---|
96 | 96 |
return if str.nil? |
97 | 97 | |
98 | 98 |
str = str.dup |
99 |
# Truncate the data at the last LF character to ensure that a partial |
|
100 |
# multibyte character, which could cause `String#valid_encoding?` to |
|
101 |
# return false, is not included at the end of the data. |
|
102 |
last_lf_index = str.rindex("\n") |
|
103 |
str = str[..last_lf_index] if last_lf_index.to_i >= 64 |
|
104 | ||
99 | 105 |
encodings = Setting.repositories_encodings.split(',').collect(&:strip) |
100 | 106 |
encodings = encodings.presence || ['UTF-8'] |
101 | 107 |
test/unit/lib/redmine/codeset_util_test.rb | ||
---|---|---|
118 | 118 |
assert_nil Redmine::CodesetUtil.guess_encoding(str) |
119 | 119 |
end |
120 | 120 |
end |
121 | ||
122 |
def test_guess_encoding_handles_trailing_partial_multibyte_character |
|
123 |
str = <<~STR |
|
124 |
いろはにほへと ちりぬるを |
|
125 |
わかよたれそ つねならむ |
|
126 |
うゐのおくやま けふこえて |
|
127 |
あさきゆめみし ゑひもせす |
|
128 |
色は匂へど 散りぬるを |
|
129 |
我が世誰ぞ 常ならむ |
|
130 |
有為の奥山 今日越えて |
|
131 |
浅き夢見し 酔ひもせず |
|
132 |
STR |
|
133 | ||
134 |
# UTF-8 string truncated at an incomplete character boundary |
|
135 |
# str.byteslice(0, 256) => "いろは...\n浅き夢見\xE3\x81" |
|
136 |
# "\xE3\x81" is a part of "し" ("\xE3\x81\x97") |
|
137 |
str_with_partial_char = str.byteslice(0, 256) |
|
138 |
assert_not str_with_partial_char.valid_encoding? |
|
139 |
assert_equal 'UTF-8', Redmine::CodesetUtil.guess_encoding(str_with_partial_char) |
|
140 |
end |
|
121 | 141 |
end |