|
|
@ -5,7 +5,6 @@ import com.fr.stable.StringUtils; |
|
|
|
import com.fr.third.ibm.icu.text.BreakIterator; |
|
|
|
import com.fr.third.ibm.icu.text.BreakIterator; |
|
|
|
|
|
|
|
|
|
|
|
import java.util.ArrayList; |
|
|
|
import java.util.ArrayList; |
|
|
|
import java.util.Iterator; |
|
|
|
|
|
|
|
import java.util.List; |
|
|
|
import java.util.List; |
|
|
|
import java.util.regex.Matcher; |
|
|
|
import java.util.regex.Matcher; |
|
|
|
import java.util.regex.Pattern; |
|
|
|
import java.util.regex.Pattern; |
|
|
@ -72,30 +71,13 @@ public class SegmentationManager { |
|
|
|
int start = itor.first(); |
|
|
|
int start = itor.first(); |
|
|
|
for (int end = itor.next(); end != BreakIterator.DONE; start = end, end = itor.next()) { |
|
|
|
for (int end = itor.next(); end != BreakIterator.DONE; start = end, end = itor.next()) { |
|
|
|
String temp = searchText.substring(start, end); |
|
|
|
String temp = searchText.substring(start, end); |
|
|
|
if (!StringUtils.isEmpty(temp)) { |
|
|
|
//去掉空和连词
|
|
|
|
|
|
|
|
if (!StringUtils.isEmpty(temp) & !AlphaFineConstants.CONJUNCTION.contains(temp)) { |
|
|
|
result.add(temp); |
|
|
|
result.add(temp); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
result = removeConjunction(result); |
|
|
|
|
|
|
|
String[] strings = new String[result.size()]; |
|
|
|
String[] strings = new String[result.size()]; |
|
|
|
result.toArray(strings); |
|
|
|
result.toArray(strings); |
|
|
|
return strings; |
|
|
|
return strings; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
|
|
|
|
* 去除连词 |
|
|
|
|
|
|
|
* |
|
|
|
|
|
|
|
* @param result |
|
|
|
|
|
|
|
* @return |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
public static List<String> removeConjunction(List<String> result) { |
|
|
|
|
|
|
|
Iterator<String> it = result.iterator(); |
|
|
|
|
|
|
|
while (it.hasNext()) { |
|
|
|
|
|
|
|
String s = it.next(); |
|
|
|
|
|
|
|
if (AlphaFineConstants.CONJUNCTION.contains(s)) { |
|
|
|
|
|
|
|
it.remove(); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
return result; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|