Feature #37878 » support-ideographic-space-separator.patch
lib/redmine/search.rb | ||
---|---|---|
135 | 135 |
def tokens |
136 | 136 |
# extract tokens from the question |
137 | 137 |
# eg. hello "bye bye" => ["hello", "bye bye"] |
138 |
tokens = @question.scan(%r{((\s|^)"[^"]+"(\s|$)|\S+)}).collect {|m| m.first.gsub(%r{(^\s*"\s*|\s*"\s*$)}, '')}
|
|
138 |
tokens = @question.scan(%r{(([[:space:]]|^)"[^"]+"([[:space:]]|$)|[[:^space:]]+)}).collect {|m| m.first.gsub(%r{(^[[:space:]]*"[[:space:]]*|[[:space:]]*"[[:space:]]*$)}, '')}
|
|
139 | 139 |
# tokens must be at least 2 characters long |
140 | 140 |
# but for Chinese characters (Chinese HANZI/Japanese KANJI), tokens can be one character |
141 | 141 |
# no more than 5 tokens to search for |
test/unit/lib/redmine/search_test.rb | ||
---|---|---|
24 | 24 |
value = "hello \"bye bye\"" |
25 | 25 |
assert_equal ["hello", "bye bye"], Redmine::Search::Tokenizer.new(value).tokens |
26 | 26 |
end |
27 | ||
28 |
def test_tokenize_should_consider_ideographic_space_as_separator |
|
29 |
value = "全角 スペース" # The space character is U+3000, not U+0020 |
|
30 |
assert_equal ["全角", "スペース"], Redmine::Search::Tokenizer.new(value).tokens |
|
31 |
end |
|
27 | 32 |
end |