support-ideographic-space-separator.patch - Redmine

Feature #37878 » support-ideographic-space-separator.patch

Go MAEDA, 2022-11-02 07:59

           def tokens
             # extract tokens from the question
             # eg. hello "bye bye" => ["hello", "bye bye"]
             tokens = @question.scan(%r{((\s|^)"[^"]+"(\s|$)|\S+)}).collect {|m| m.first.gsub(%r{(^\s*"\s*|\s*"\s*$)}, '')}
             tokens = @question.scan(%r{(([[:space:]]|^)"[^"]+"([[:space:]]|$)|[[:^space:]]+)}).collect {|m| m.first.gsub(%r{(^[[:space:]]*"[[:space:]]*|[[:space:]]*"[[:space:]]*$)}, '')}
             # tokens must be at least 2 characters long
             # but for Chinese characters (Chinese HANZI/Japanese KANJI), tokens can be one character
             # no more than 5 tokens to search for

         value = "hello \"bye bye\""
         assert_equal ["hello", "bye bye"], Redmine::Search::Tokenizer.new(value).tokens
       end
       def test_tokenize_should_consider_ideographic_space_as_separator
         value = "全角　スペース"  # The space character is U+3000, not U+0020
         assert_equal ["全角", "スペース"], Redmine::Search::Tokenizer.new(value).tokens
       end
     end

(1-1/1)