support phrases in search
This commit is contained in:
@@ -49,10 +49,7 @@ class SearchModel {
|
||||
searchEvent = new SearchEvent(searchHash : root, uuid : UUID.randomUUID(), oobInfohash : true, compressedResults : true)
|
||||
payload = root
|
||||
} else {
|
||||
def replaced = query.toLowerCase().trim().replaceAll(SplitPattern.SPLIT_PATTERN, " ")
|
||||
def terms = replaced.split(" ")
|
||||
def nonEmpty = []
|
||||
terms.each { if (it.length() > 0) nonEmpty << it }
|
||||
def nonEmpty = SplitPattern.termify(query)
|
||||
payload = String.join(" ", nonEmpty).getBytes(StandardCharsets.UTF_8)
|
||||
searchEvent = new SearchEvent(searchTerms : nonEmpty, uuid : UUID.randomUUID(), oobInfohash: true,
|
||||
searchComments : core.muOptions.searchComments, compressedResults : true)
|
||||
|
@@ -4,4 +4,88 @@ class SplitPattern {
|
||||
|
||||
public static final String SPLIT_PATTERN = "[\\*\\+\\-,\\.:;\\(\\)=_/\\\\\\!\\\"\\\'\\\$%\\|\\[\\]\\{\\}\\?]";
|
||||
|
||||
private static final Set<Character> SPLIT_CHARS = new HashSet<>()
|
||||
static {
|
||||
SPLIT_CHARS.with {
|
||||
add(' '.toCharacter())
|
||||
add('*'.toCharacter())
|
||||
add('+'.toCharacter())
|
||||
add('-'.toCharacter())
|
||||
add(','.toCharacter())
|
||||
add('.'.toCharacter())
|
||||
add(':'.toCharacter())
|
||||
add(';'.toCharacter())
|
||||
add('('.toCharacter())
|
||||
add(')'.toCharacter())
|
||||
add('='.toCharacter())
|
||||
add('_'.toCharacter())
|
||||
add('/'.toCharacter())
|
||||
add('\\'.toCharacter())
|
||||
add('!'.toCharacter())
|
||||
add('\''.toCharacter())
|
||||
add('$'.toCharacter())
|
||||
add('%'.toCharacter())
|
||||
add('|'.toCharacter())
|
||||
add('['.toCharacter())
|
||||
add(']'.toCharacter())
|
||||
add('{'.toCharacter())
|
||||
add('}'.toCharacter())
|
||||
add('?'.toCharacter())
|
||||
}
|
||||
}
|
||||
|
||||
public static String[] termify(final String source) {
|
||||
String lowercase = source.toLowerCase().trim()
|
||||
|
||||
def rv = []
|
||||
int pos = 0
|
||||
int quote = -1
|
||||
|
||||
StringBuilder tmp = new StringBuilder()
|
||||
while(pos < lowercase.length()) {
|
||||
char c = lowercase.charAt(pos++)
|
||||
if (quote < 0 && c == '"') {
|
||||
quote = pos - 1
|
||||
continue
|
||||
}
|
||||
if (quote >= 0) {
|
||||
if (c == '"') {
|
||||
quote = -1
|
||||
if (tmp.length() != 0) {
|
||||
rv << tmp.toString()
|
||||
tmp = new StringBuilder()
|
||||
}
|
||||
} else
|
||||
tmp.append(c)
|
||||
} else if (SPLIT_CHARS.contains(c)) {
|
||||
if (tmp.length() != 0) {
|
||||
rv << tmp.toString()
|
||||
tmp = new StringBuilder()
|
||||
}
|
||||
} else
|
||||
tmp.append c
|
||||
}
|
||||
|
||||
// check if odd number of quotes and re-tokenize from last quote
|
||||
if (quote >= 0) {
|
||||
tmp = new StringBuilder()
|
||||
pos = quote + 1
|
||||
while(pos < lowercase.length()) {
|
||||
char c = lowercase.charAt(pos++)
|
||||
if (SPLIT_CHARS.contains(c)) {
|
||||
if (tmp.length() > 0) {
|
||||
rv << tmp.toString()
|
||||
tmp = new StringBuilder()
|
||||
}
|
||||
} else
|
||||
tmp.append(c)
|
||||
}
|
||||
}
|
||||
|
||||
if (tmp.length() > 0)
|
||||
rv << tmp.toString()
|
||||
|
||||
rv
|
||||
}
|
||||
|
||||
}
|
||||
|
27
core/src/test/groovy/com/muwire/core/SplitPatternTest.groovy
Normal file
27
core/src/test/groovy/com/muwire/core/SplitPatternTest.groovy
Normal file
@@ -0,0 +1,27 @@
|
||||
package com.muwire.core
|
||||
|
||||
import org.junit.Test
|
||||
|
||||
class SplitPatternTest {
|
||||
|
||||
@Test
|
||||
void testReplaceCharacters() {
|
||||
assert SplitPattern.termify("a_b.c") == ['a','b','c']
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPhrase() {
|
||||
assert SplitPattern.termify('"siamese cat"') == ['siamese cat']
|
||||
}
|
||||
|
||||
@Test
|
||||
void testInvalidPhrase() {
|
||||
assert SplitPattern.termify('"siamese cat') == ['siamese', 'cat']
|
||||
}
|
||||
|
||||
@Test
|
||||
void testManyPhrases() {
|
||||
assert SplitPattern.termify('"siamese cat" any cat "persian cat"') ==
|
||||
['siamese cat','any','cat','persian cat']
|
||||
}
|
||||
}
|
@@ -107,11 +107,7 @@ class MainFrameController {
|
||||
searchEvent = new SearchEvent(searchHash : root, uuid : uuid, oobInfohash: true, compressedResults : true)
|
||||
payload = root
|
||||
} else {
|
||||
// this can be improved a lot
|
||||
def replaced = search.toLowerCase().trim().replaceAll(SplitPattern.SPLIT_PATTERN, " ")
|
||||
def terms = replaced.split(" ")
|
||||
def nonEmpty = []
|
||||
terms.each { if (it.length() > 0) nonEmpty << it }
|
||||
def nonEmpty = SplitPattern.termify(search)
|
||||
payload = String.join(" ",nonEmpty).getBytes(StandardCharsets.UTF_8)
|
||||
searchEvent = new SearchEvent(searchTerms : nonEmpty, uuid : uuid, oobInfohash: true,
|
||||
searchComments : core.muOptions.searchComments, compressedResults : true)
|
||||
|
Reference in New Issue
Block a user