|
|
|
@ -450,13 +450,14 @@ public class DocInfoService extends EsService<DocInfo> {
|
|
|
|
|
return response;
|
|
|
|
|
}
|
|
|
|
|
public List<WordCloudItem> getWordCloudByCateId(String categoryId) {
|
|
|
|
|
String regex = "<.*?>"; // 匹配HTML标签的正则表达式
|
|
|
|
|
Map<String,Integer> temp = new ConcurrentHashMap<>();
|
|
|
|
|
List<WordCloudItem> wordCloudItemList = new ArrayList<>();
|
|
|
|
|
DocInfo docInfo = new DocInfo();
|
|
|
|
|
docInfo.setCateId(categoryId);
|
|
|
|
|
List<DocInfo> docInfoList = selectDocInfoList(docInfo);
|
|
|
|
|
if(CollectionUtils.isNotEmpty(docInfoList)){
|
|
|
|
|
List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent()).stream()).collect(Collectors.toList());
|
|
|
|
|
List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+","").replaceAll("[,, '“”.。]", "").trim()).stream()).collect(Collectors.toList());
|
|
|
|
|
if(CollectionUtils.isNotEmpty(termList)){
|
|
|
|
|
termList.parallelStream().forEach(term -> {
|
|
|
|
|
String word = term.word;
|
|
|
|
|