support phrases in search

This commit is contained in:
Zlatin Balevsky
2019-11-05 15:52:23 +00:00
parent b865376d24
commit 9feb891c51
4 changed files with 113 additions and 9 deletions

View File

@@ -49,10 +49,7 @@ class SearchModel {
searchEvent = new SearchEvent(searchHash : root, uuid : UUID.randomUUID(), oobInfohash : true, compressedResults : true) searchEvent = new SearchEvent(searchHash : root, uuid : UUID.randomUUID(), oobInfohash : true, compressedResults : true)
payload = root payload = root
} else { } else {
def replaced = query.toLowerCase().trim().replaceAll(SplitPattern.SPLIT_PATTERN, " ") def nonEmpty = SplitPattern.termify(query)
def terms = replaced.split(" ")
def nonEmpty = []
terms.each { if (it.length() > 0) nonEmpty << it }
payload = String.join(" ", nonEmpty).getBytes(StandardCharsets.UTF_8) payload = String.join(" ", nonEmpty).getBytes(StandardCharsets.UTF_8)
searchEvent = new SearchEvent(searchTerms : nonEmpty, uuid : UUID.randomUUID(), oobInfohash: true, searchEvent = new SearchEvent(searchTerms : nonEmpty, uuid : UUID.randomUUID(), oobInfohash: true,
searchComments : core.muOptions.searchComments, compressedResults : true) searchComments : core.muOptions.searchComments, compressedResults : true)

View File

@@ -3,5 +3,89 @@ package com.muwire.core
class SplitPattern { class SplitPattern {
public static final String SPLIT_PATTERN = "[\\*\\+\\-,\\.:;\\(\\)=_/\\\\\\!\\\"\\\'\\\$%\\|\\[\\]\\{\\}\\?]"; public static final String SPLIT_PATTERN = "[\\*\\+\\-,\\.:;\\(\\)=_/\\\\\\!\\\"\\\'\\\$%\\|\\[\\]\\{\\}\\?]";
private static final Set<Character> SPLIT_CHARS = new HashSet<>()
static {
SPLIT_CHARS.with {
add(' '.toCharacter())
add('*'.toCharacter())
add('+'.toCharacter())
add('-'.toCharacter())
add(','.toCharacter())
add('.'.toCharacter())
add(':'.toCharacter())
add(';'.toCharacter())
add('('.toCharacter())
add(')'.toCharacter())
add('='.toCharacter())
add('_'.toCharacter())
add('/'.toCharacter())
add('\\'.toCharacter())
add('!'.toCharacter())
add('\''.toCharacter())
add('$'.toCharacter())
add('%'.toCharacter())
add('|'.toCharacter())
add('['.toCharacter())
add(']'.toCharacter())
add('{'.toCharacter())
add('}'.toCharacter())
add('?'.toCharacter())
}
}
public static String[] termify(final String source) {
String lowercase = source.toLowerCase().trim()
def rv = []
int pos = 0
int quote = -1
StringBuilder tmp = new StringBuilder()
while(pos < lowercase.length()) {
char c = lowercase.charAt(pos++)
if (quote < 0 && c == '"') {
quote = pos - 1
continue
}
if (quote >= 0) {
if (c == '"') {
quote = -1
if (tmp.length() != 0) {
rv << tmp.toString()
tmp = new StringBuilder()
}
} else
tmp.append(c)
} else if (SPLIT_CHARS.contains(c)) {
if (tmp.length() != 0) {
rv << tmp.toString()
tmp = new StringBuilder()
}
} else
tmp.append c
}
// check if odd number of quotes and re-tokenize from last quote
if (quote >= 0) {
tmp = new StringBuilder()
pos = quote + 1
while(pos < lowercase.length()) {
char c = lowercase.charAt(pos++)
if (SPLIT_CHARS.contains(c)) {
if (tmp.length() > 0) {
rv << tmp.toString()
tmp = new StringBuilder()
}
} else
tmp.append(c)
}
}
if (tmp.length() > 0)
rv << tmp.toString()
rv
}
} }

View File

@@ -0,0 +1,27 @@
package com.muwire.core
import org.junit.Test
class SplitPatternTest {
@Test
void testReplaceCharacters() {
assert SplitPattern.termify("a_b.c") == ['a','b','c']
}
@Test
void testPhrase() {
assert SplitPattern.termify('"siamese cat"') == ['siamese cat']
}
@Test
void testInvalidPhrase() {
assert SplitPattern.termify('"siamese cat') == ['siamese', 'cat']
}
@Test
void testManyPhrases() {
assert SplitPattern.termify('"siamese cat" any cat "persian cat"') ==
['siamese cat','any','cat','persian cat']
}
}

View File

@@ -107,11 +107,7 @@ class MainFrameController {
searchEvent = new SearchEvent(searchHash : root, uuid : uuid, oobInfohash: true, compressedResults : true) searchEvent = new SearchEvent(searchHash : root, uuid : uuid, oobInfohash: true, compressedResults : true)
payload = root payload = root
} else { } else {
// this can be improved a lot def nonEmpty = SplitPattern.termify(search)
def replaced = search.toLowerCase().trim().replaceAll(SplitPattern.SPLIT_PATTERN, " ")
def terms = replaced.split(" ")
def nonEmpty = []
terms.each { if (it.length() > 0) nonEmpty << it }
payload = String.join(" ",nonEmpty).getBytes(StandardCharsets.UTF_8) payload = String.join(" ",nonEmpty).getBytes(StandardCharsets.UTF_8)
searchEvent = new SearchEvent(searchTerms : nonEmpty, uuid : uuid, oobInfohash: true, searchEvent = new SearchEvent(searchTerms : nonEmpty, uuid : uuid, oobInfohash: true,
searchComments : core.muOptions.searchComments, compressedResults : true) searchComments : core.muOptions.searchComments, compressedResults : true)