Project

General

Profile

Defect #41464 » fix-guess_encoding.patch

Go MAEDA, 2024-10-10 09:28

View differences:

app/models/import.rb
69 69
    encoding = lu(user, :general_csv_encoding)
70 70
    if file_exists?
71 71
      begin
72
        content = File.read(filepath, 256)
72
        content = File.read(filepath, 4.kilobytes)
73 73

  
74 74
        separator = [',', ';'].max_by {|sep| content.count(sep)}
75 75
        wrapper = ['"', "'"].max_by {|quote_char| content.count(quote_char)}
lib/redmine/codeset_util.rb
96 96
      return if str.nil?
97 97

  
98 98
      str = str.dup
99
      # Truncate the data at the last LF character to ensure that a partial
100
      # multibyte character, which could cause `String#valid_encoding?` to
101
      # return false, is not included at the end of the data.
102
      last_lf_index = str.rindex("\n")
103
      str = str[..last_lf_index] if last_lf_index.to_i >= 64
104

  
99 105
      encodings = Setting.repositories_encodings.split(',').collect(&:strip)
100 106
      encodings = encodings.presence || ['UTF-8']
101 107

  
test/unit/lib/redmine/codeset_util_test.rb
118 118
      assert_nil Redmine::CodesetUtil.guess_encoding(str)
119 119
    end
120 120
  end
121

  
122
  def test_guess_encoding_handles_trailing_partial_multibyte_character
123
    str = <<~STR
124
      いろはにほへと ちりぬるを
125
      わかよたれそ つねならむ
126
      うゐのおくやま けふこえて
127
      あさきゆめみし ゑひもせす
128
      色は匂へど 散りぬるを
129
      我が世誰ぞ 常ならむ
130
      有為の奥山 今日越えて
131
      浅き夢見し 酔ひもせず
132
    STR
133

  
134
    # UTF-8 string truncated at an incomplete character boundary
135
    # str.byteslice(0, 256) => "いろは...\n浅き夢見\xE3\x81"
136
    # "\xE3\x81" is a part of "し" ("\xE3\x81\x97")
137
    str_with_partial_char = str.byteslice(0, 256)
138
    assert_not str_with_partial_char.valid_encoding?
139
    assert_equal 'UTF-8', Redmine::CodesetUtil.guess_encoding(str_with_partial_char)
140
  end
121 141
end
(1-1/2)