diff --git a/cli-lanterna/src/main/groovy/com/muwire/clilanterna/SearchModel.groovy b/cli-lanterna/src/main/groovy/com/muwire/clilanterna/SearchModel.groovy index 9ec69b50..573a82ad 100644 --- a/cli-lanterna/src/main/groovy/com/muwire/clilanterna/SearchModel.groovy +++ b/cli-lanterna/src/main/groovy/com/muwire/clilanterna/SearchModel.groovy @@ -49,10 +49,7 @@ class SearchModel { searchEvent = new SearchEvent(searchHash : root, uuid : UUID.randomUUID(), oobInfohash : true, compressedResults : true) payload = root } else { - def replaced = query.toLowerCase().trim().replaceAll(SplitPattern.SPLIT_PATTERN, " ") - def terms = replaced.split(" ") - def nonEmpty = [] - terms.each { if (it.length() > 0) nonEmpty << it } + def nonEmpty = SplitPattern.termify(query) payload = String.join(" ", nonEmpty).getBytes(StandardCharsets.UTF_8) searchEvent = new SearchEvent(searchTerms : nonEmpty, uuid : UUID.randomUUID(), oobInfohash: true, searchComments : core.muOptions.searchComments, compressedResults : true) diff --git a/core/src/main/groovy/com/muwire/core/SplitPattern.groovy b/core/src/main/groovy/com/muwire/core/SplitPattern.groovy index 0908d045..606c903a 100644 --- a/core/src/main/groovy/com/muwire/core/SplitPattern.groovy +++ b/core/src/main/groovy/com/muwire/core/SplitPattern.groovy @@ -3,5 +3,89 @@ package com.muwire.core class SplitPattern { public static final String SPLIT_PATTERN = "[\\*\\+\\-,\\.:;\\(\\)=_/\\\\\\!\\\"\\\'\\\$%\\|\\[\\]\\{\\}\\?]"; + + private static final Set SPLIT_CHARS = new HashSet<>() + static { + SPLIT_CHARS.with { + add(' '.toCharacter()) + add('*'.toCharacter()) + add('+'.toCharacter()) + add('-'.toCharacter()) + add(','.toCharacter()) + add('.'.toCharacter()) + add(':'.toCharacter()) + add(';'.toCharacter()) + add('('.toCharacter()) + add(')'.toCharacter()) + add('='.toCharacter()) + add('_'.toCharacter()) + add('/'.toCharacter()) + add('\\'.toCharacter()) + add('!'.toCharacter()) + add('\''.toCharacter()) + add('$'.toCharacter()) + add('%'.toCharacter()) + add('|'.toCharacter()) + add('['.toCharacter()) + add(']'.toCharacter()) + add('{'.toCharacter()) + add('}'.toCharacter()) + add('?'.toCharacter()) + } + } + + public static String[] termify(final String source) { + String lowercase = source.toLowerCase().trim() + + def rv = [] + int pos = 0 + int quote = -1 + + StringBuilder tmp = new StringBuilder() + while(pos < lowercase.length()) { + char c = lowercase.charAt(pos++) + if (quote < 0 && c == '"') { + quote = pos - 1 + continue + } + if (quote >= 0) { + if (c == '"') { + quote = -1 + if (tmp.length() != 0) { + rv << tmp.toString() + tmp = new StringBuilder() + } + } else + tmp.append(c) + } else if (SPLIT_CHARS.contains(c)) { + if (tmp.length() != 0) { + rv << tmp.toString() + tmp = new StringBuilder() + } + } else + tmp.append c + } + + // check if odd number of quotes and re-tokenize from last quote + if (quote >= 0) { + tmp = new StringBuilder() + pos = quote + 1 + while(pos < lowercase.length()) { + char c = lowercase.charAt(pos++) + if (SPLIT_CHARS.contains(c)) { + if (tmp.length() > 0) { + rv << tmp.toString() + tmp = new StringBuilder() + } + } else + tmp.append(c) + } + } + + if (tmp.length() > 0) + rv << tmp.toString() + + rv + } } diff --git a/core/src/test/groovy/com/muwire/core/SplitPatternTest.groovy b/core/src/test/groovy/com/muwire/core/SplitPatternTest.groovy new file mode 100644 index 00000000..10853dcd --- /dev/null +++ b/core/src/test/groovy/com/muwire/core/SplitPatternTest.groovy @@ -0,0 +1,27 @@ +package com.muwire.core + +import org.junit.Test + +class SplitPatternTest { + + @Test + void testReplaceCharacters() { + assert SplitPattern.termify("a_b.c") == ['a','b','c'] + } + + @Test + void testPhrase() { + assert SplitPattern.termify('"siamese cat"') == ['siamese cat'] + } + + @Test + void testInvalidPhrase() { + assert SplitPattern.termify('"siamese cat') == ['siamese', 'cat'] + } + + @Test + void testManyPhrases() { + assert SplitPattern.termify('"siamese cat" any cat "persian cat"') == + ['siamese cat','any','cat','persian cat'] + } +} diff --git a/gui/griffon-app/controllers/com/muwire/gui/MainFrameController.groovy b/gui/griffon-app/controllers/com/muwire/gui/MainFrameController.groovy index efa3e771..0b932e1a 100644 --- a/gui/griffon-app/controllers/com/muwire/gui/MainFrameController.groovy +++ b/gui/griffon-app/controllers/com/muwire/gui/MainFrameController.groovy @@ -107,11 +107,7 @@ class MainFrameController { searchEvent = new SearchEvent(searchHash : root, uuid : uuid, oobInfohash: true, compressedResults : true) payload = root } else { - // this can be improved a lot - def replaced = search.toLowerCase().trim().replaceAll(SplitPattern.SPLIT_PATTERN, " ") - def terms = replaced.split(" ") - def nonEmpty = [] - terms.each { if (it.length() > 0) nonEmpty << it } + def nonEmpty = SplitPattern.termify(search) payload = String.join(" ",nonEmpty).getBytes(StandardCharsets.UTF_8) searchEvent = new SearchEvent(searchTerms : nonEmpty, uuid : uuid, oobInfohash: true, searchComments : core.muOptions.searchComments, compressedResults : true)