support phrases in search
This commit is contained in:
@@ -49,10 +49,7 @@ class SearchModel {
|
|||||||
searchEvent = new SearchEvent(searchHash : root, uuid : UUID.randomUUID(), oobInfohash : true, compressedResults : true)
|
searchEvent = new SearchEvent(searchHash : root, uuid : UUID.randomUUID(), oobInfohash : true, compressedResults : true)
|
||||||
payload = root
|
payload = root
|
||||||
} else {
|
} else {
|
||||||
def replaced = query.toLowerCase().trim().replaceAll(SplitPattern.SPLIT_PATTERN, " ")
|
def nonEmpty = SplitPattern.termify(query)
|
||||||
def terms = replaced.split(" ")
|
|
||||||
def nonEmpty = []
|
|
||||||
terms.each { if (it.length() > 0) nonEmpty << it }
|
|
||||||
payload = String.join(" ", nonEmpty).getBytes(StandardCharsets.UTF_8)
|
payload = String.join(" ", nonEmpty).getBytes(StandardCharsets.UTF_8)
|
||||||
searchEvent = new SearchEvent(searchTerms : nonEmpty, uuid : UUID.randomUUID(), oobInfohash: true,
|
searchEvent = new SearchEvent(searchTerms : nonEmpty, uuid : UUID.randomUUID(), oobInfohash: true,
|
||||||
searchComments : core.muOptions.searchComments, compressedResults : true)
|
searchComments : core.muOptions.searchComments, compressedResults : true)
|
||||||
|
@@ -3,5 +3,89 @@ package com.muwire.core
|
|||||||
class SplitPattern {
|
class SplitPattern {
|
||||||
|
|
||||||
public static final String SPLIT_PATTERN = "[\\*\\+\\-,\\.:;\\(\\)=_/\\\\\\!\\\"\\\'\\\$%\\|\\[\\]\\{\\}\\?]";
|
public static final String SPLIT_PATTERN = "[\\*\\+\\-,\\.:;\\(\\)=_/\\\\\\!\\\"\\\'\\\$%\\|\\[\\]\\{\\}\\?]";
|
||||||
|
|
||||||
|
private static final Set<Character> SPLIT_CHARS = new HashSet<>()
|
||||||
|
static {
|
||||||
|
SPLIT_CHARS.with {
|
||||||
|
add(' '.toCharacter())
|
||||||
|
add('*'.toCharacter())
|
||||||
|
add('+'.toCharacter())
|
||||||
|
add('-'.toCharacter())
|
||||||
|
add(','.toCharacter())
|
||||||
|
add('.'.toCharacter())
|
||||||
|
add(':'.toCharacter())
|
||||||
|
add(';'.toCharacter())
|
||||||
|
add('('.toCharacter())
|
||||||
|
add(')'.toCharacter())
|
||||||
|
add('='.toCharacter())
|
||||||
|
add('_'.toCharacter())
|
||||||
|
add('/'.toCharacter())
|
||||||
|
add('\\'.toCharacter())
|
||||||
|
add('!'.toCharacter())
|
||||||
|
add('\''.toCharacter())
|
||||||
|
add('$'.toCharacter())
|
||||||
|
add('%'.toCharacter())
|
||||||
|
add('|'.toCharacter())
|
||||||
|
add('['.toCharacter())
|
||||||
|
add(']'.toCharacter())
|
||||||
|
add('{'.toCharacter())
|
||||||
|
add('}'.toCharacter())
|
||||||
|
add('?'.toCharacter())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String[] termify(final String source) {
|
||||||
|
String lowercase = source.toLowerCase().trim()
|
||||||
|
|
||||||
|
def rv = []
|
||||||
|
int pos = 0
|
||||||
|
int quote = -1
|
||||||
|
|
||||||
|
StringBuilder tmp = new StringBuilder()
|
||||||
|
while(pos < lowercase.length()) {
|
||||||
|
char c = lowercase.charAt(pos++)
|
||||||
|
if (quote < 0 && c == '"') {
|
||||||
|
quote = pos - 1
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if (quote >= 0) {
|
||||||
|
if (c == '"') {
|
||||||
|
quote = -1
|
||||||
|
if (tmp.length() != 0) {
|
||||||
|
rv << tmp.toString()
|
||||||
|
tmp = new StringBuilder()
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
tmp.append(c)
|
||||||
|
} else if (SPLIT_CHARS.contains(c)) {
|
||||||
|
if (tmp.length() != 0) {
|
||||||
|
rv << tmp.toString()
|
||||||
|
tmp = new StringBuilder()
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
tmp.append c
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if odd number of quotes and re-tokenize from last quote
|
||||||
|
if (quote >= 0) {
|
||||||
|
tmp = new StringBuilder()
|
||||||
|
pos = quote + 1
|
||||||
|
while(pos < lowercase.length()) {
|
||||||
|
char c = lowercase.charAt(pos++)
|
||||||
|
if (SPLIT_CHARS.contains(c)) {
|
||||||
|
if (tmp.length() > 0) {
|
||||||
|
rv << tmp.toString()
|
||||||
|
tmp = new StringBuilder()
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
tmp.append(c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tmp.length() > 0)
|
||||||
|
rv << tmp.toString()
|
||||||
|
|
||||||
|
rv
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
27
core/src/test/groovy/com/muwire/core/SplitPatternTest.groovy
Normal file
27
core/src/test/groovy/com/muwire/core/SplitPatternTest.groovy
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
package com.muwire.core
|
||||||
|
|
||||||
|
import org.junit.Test
|
||||||
|
|
||||||
|
class SplitPatternTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testReplaceCharacters() {
|
||||||
|
assert SplitPattern.termify("a_b.c") == ['a','b','c']
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testPhrase() {
|
||||||
|
assert SplitPattern.termify('"siamese cat"') == ['siamese cat']
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testInvalidPhrase() {
|
||||||
|
assert SplitPattern.termify('"siamese cat') == ['siamese', 'cat']
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testManyPhrases() {
|
||||||
|
assert SplitPattern.termify('"siamese cat" any cat "persian cat"') ==
|
||||||
|
['siamese cat','any','cat','persian cat']
|
||||||
|
}
|
||||||
|
}
|
@@ -107,11 +107,7 @@ class MainFrameController {
|
|||||||
searchEvent = new SearchEvent(searchHash : root, uuid : uuid, oobInfohash: true, compressedResults : true)
|
searchEvent = new SearchEvent(searchHash : root, uuid : uuid, oobInfohash: true, compressedResults : true)
|
||||||
payload = root
|
payload = root
|
||||||
} else {
|
} else {
|
||||||
// this can be improved a lot
|
def nonEmpty = SplitPattern.termify(search)
|
||||||
def replaced = search.toLowerCase().trim().replaceAll(SplitPattern.SPLIT_PATTERN, " ")
|
|
||||||
def terms = replaced.split(" ")
|
|
||||||
def nonEmpty = []
|
|
||||||
terms.each { if (it.length() > 0) nonEmpty << it }
|
|
||||||
payload = String.join(" ",nonEmpty).getBytes(StandardCharsets.UTF_8)
|
payload = String.join(" ",nonEmpty).getBytes(StandardCharsets.UTF_8)
|
||||||
searchEvent = new SearchEvent(searchTerms : nonEmpty, uuid : uuid, oobInfohash: true,
|
searchEvent = new SearchEvent(searchTerms : nonEmpty, uuid : uuid, oobInfohash: true,
|
||||||
searchComments : core.muOptions.searchComments, compressedResults : true)
|
searchComments : core.muOptions.searchComments, compressedResults : true)
|
||||||
|
Reference in New Issue
Block a user