diff --git a/app/models/import.rb b/app/models/import.rb index 94e44c5e2..ab76e1b91 100644 --- a/app/models/import.rb +++ b/app/models/import.rb @@ -69,7 +69,7 @@ class Import < ApplicationRecord encoding = lu(user, :general_csv_encoding) if file_exists? begin - content = File.read(filepath, 256) + content = File.read(filepath, 4.kilobytes) separator = [',', ';'].max_by {|sep| content.count(sep)} wrapper = ['"', "'"].max_by {|quote_char| content.count(quote_char)} diff --git a/lib/redmine/codeset_util.rb b/lib/redmine/codeset_util.rb index aa0a23334..0d6594733 100644 --- a/lib/redmine/codeset_util.rb +++ b/lib/redmine/codeset_util.rb @@ -96,6 +96,12 @@ module Redmine return if str.nil? str = str.dup + # Truncate the data at the last LF character to ensure that a partial + # multibyte character, which could cause `String#valid_encoding?` to + # return false, is not included at the end of the data. + last_lf_index = str.rindex("\n") + str = str[..last_lf_index] if last_lf_index.to_i >= 64 + encodings = Setting.repositories_encodings.split(',').collect(&:strip) encodings = encodings.presence || ['UTF-8'] diff --git a/test/unit/lib/redmine/codeset_util_test.rb b/test/unit/lib/redmine/codeset_util_test.rb index 39a619d4c..d71f70908 100644 --- a/test/unit/lib/redmine/codeset_util_test.rb +++ b/test/unit/lib/redmine/codeset_util_test.rb @@ -118,4 +118,24 @@ class Redmine::CodesetUtilTest < ActiveSupport::TestCase assert_nil Redmine::CodesetUtil.guess_encoding(str) end end + + def test_guess_encoding_handles_trailing_partial_multibyte_character + str = <<~STR + いろはにほへと ちりぬるを + わかよたれそ つねならむ + うゐのおくやま けふこえて + あさきゆめみし ゑひもせす + 色は匂へど 散りぬるを + 我が世誰ぞ 常ならむ + 有為の奥山 今日越えて + 浅き夢見し 酔ひもせず + STR + + # UTF-8 string truncated at an incomplete character boundary + # str.byteslice(0, 256) => "いろは...\n浅き夢見\xE3\x81" + # "\xE3\x81" is a part of "し" ("\xE3\x81\x97") + str_with_partial_char = str.byteslice(0, 256) + assert_not str_with_partial_char.valid_encoding? + assert_equal 'UTF-8', Redmine::CodesetUtil.guess_encoding(str_with_partial_char) + end end