|
|
|
@ -14,6 +14,7 @@ import com.ruoyi.biemo.mongodb.utils.MongoHelper;
|
|
|
|
|
import com.ruoyi.biemo.nlp.DependencyParserUtils;
|
|
|
|
|
import com.ruoyi.biemo.nlp.SentimentAnalysisUtils;
|
|
|
|
|
import com.ruoyi.biemo.nlp.SummaryUtils;
|
|
|
|
|
import com.ruoyi.biemo.utils.FormatUtil;
|
|
|
|
|
import com.ruoyi.biemo.utils.MyObjects;
|
|
|
|
|
import com.ruoyi.common.core.domain.AjaxResult;
|
|
|
|
|
import com.ruoyi.common.utils.StringUtils;
|
|
|
|
@ -29,10 +30,12 @@ import org.springframework.context.event.EventListener;
|
|
|
|
|
import org.springframework.stereotype.Service;
|
|
|
|
|
|
|
|
|
|
import javax.print.Doc;
|
|
|
|
|
import java.io.IOException;
|
|
|
|
|
import java.util.*;
|
|
|
|
|
import java.util.concurrent.ConcurrentHashMap;
|
|
|
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
|
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
import java.util.stream.Stream;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @author makesoft
|
|
|
|
@ -450,36 +453,53 @@ public class DocInfoService extends EsService<DocInfo> {
|
|
|
|
|
|
|
|
|
|
public List<WordCloudItem> getWordCloudByCateId(String categoryId) {
|
|
|
|
|
String regex = "<.*?>"; // 匹配HTML标签的正则表达式
|
|
|
|
|
String regEx="[\n`~1234567890!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。, 、?\n" +
|
|
|
|
|
" ]";
|
|
|
|
|
Map<String, Integer> temp = new ConcurrentHashMap<>();
|
|
|
|
|
List<WordCloudItem> wordCloudItemList = new ArrayList<>();
|
|
|
|
|
DocInfo docInfo = new DocInfo();
|
|
|
|
|
docInfo.setCateId(categoryId);
|
|
|
|
|
List<DocInfo> docInfoList = selectDocInfoList(docInfo);
|
|
|
|
|
if (CollectionUtils.isNotEmpty(docInfoList)) {
|
|
|
|
|
List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll("[,, '“”.。]", "").trim()).stream()).collect(Collectors.toList());
|
|
|
|
|
if (CollectionUtils.isNotEmpty(termList)) {
|
|
|
|
|
termList.parallelStream().forEach(term -> {
|
|
|
|
|
String word = term.word;
|
|
|
|
|
Integer value = term.getFrequency();
|
|
|
|
|
if (!temp.containsKey(word)) {
|
|
|
|
|
temp.put(word, 1);
|
|
|
|
|
} else {
|
|
|
|
|
temp.put(word, temp.get(word) + 1);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> {
|
|
|
|
|
try {
|
|
|
|
|
return NLPTokenizer.
|
|
|
|
|
segment(FormatUtil.RemovalOfStopWords(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll(regEx, "").trim())
|
|
|
|
|
).stream();
|
|
|
|
|
} catch (IOException e) {
|
|
|
|
|
e.printStackTrace();
|
|
|
|
|
return Stream.empty();
|
|
|
|
|
}
|
|
|
|
|
}).collect(Collectors.toList());
|
|
|
|
|
if (CollectionUtils.isNotEmpty(termList)) {
|
|
|
|
|
termList.parallelStream().filter(t->t.word.length()>1).forEach(term -> {
|
|
|
|
|
String word = term.word;
|
|
|
|
|
Integer value = term.getFrequency();
|
|
|
|
|
if (!temp.containsKey(word)) {
|
|
|
|
|
temp.put(word, 1);
|
|
|
|
|
} else {
|
|
|
|
|
temp.put(word, temp.get(word) + 1);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
for (Map.Entry<String, Integer> entry : temp.entrySet()) {
|
|
|
|
|
WordCloudItem wordCloudItem = new WordCloudItem();
|
|
|
|
|
wordCloudItem.setName(entry.getKey());
|
|
|
|
|
wordCloudItem.setValue(entry.getValue());
|
|
|
|
|
wordCloudItemList.add(wordCloudItem);
|
|
|
|
|
}
|
|
|
|
|
return wordCloudItemList;
|
|
|
|
|
|
|
|
|
|
for(
|
|
|
|
|
Map.Entry<String, Integer> entry :temp.entrySet())
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
|
WordCloudItem wordCloudItem = new WordCloudItem();
|
|
|
|
|
wordCloudItem.setName(entry.getKey());
|
|
|
|
|
wordCloudItem.setValue(entry.getValue());
|
|
|
|
|
wordCloudItemList.add(wordCloudItem);
|
|
|
|
|
}
|
|
|
|
|
return wordCloudItemList;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public EmotionResult getEmotionAnalysis(String categoryId) {
|
|
|
|
|
String regex = "<.*?>"; // 匹配HTML标签的正则表达式
|
|
|
|
|
String regEx="[\n`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。, 、?]";
|
|
|
|
|
Map<String, Integer> temp = new ConcurrentHashMap<>();
|
|
|
|
|
// List<EmotionResult> emotionResultItemList = new ArrayList<>();
|
|
|
|
|
EmotionResult emotionResult1 = new EmotionResult();
|
|
|
|
@ -487,9 +507,19 @@ public class DocInfoService extends EsService<DocInfo> {
|
|
|
|
|
docInfo.setCateId(categoryId);
|
|
|
|
|
List<DocInfo> docInfoList = selectDocInfoList(docInfo);
|
|
|
|
|
if (CollectionUtils.isNotEmpty(docInfoList)) {
|
|
|
|
|
List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll("[,, '“”.。]", "").trim()).stream()).collect(Collectors.toList());
|
|
|
|
|
// List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll("[,, '“”.。]", "").trim()).stream()).collect(Collectors.toList());
|
|
|
|
|
List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> {
|
|
|
|
|
try {
|
|
|
|
|
return NLPTokenizer.
|
|
|
|
|
segment(FormatUtil.RemovalOfStopWords(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll(regEx, "").trim())
|
|
|
|
|
).stream();
|
|
|
|
|
} catch (IOException e) {
|
|
|
|
|
e.printStackTrace();
|
|
|
|
|
return Stream.empty();
|
|
|
|
|
}
|
|
|
|
|
}).collect(Collectors.toList());
|
|
|
|
|
if (CollectionUtils.isNotEmpty(termList)) {
|
|
|
|
|
termList.parallelStream().forEach(term -> {
|
|
|
|
|
termList.parallelStream().filter(t->t.word.length()>1).forEach(term -> {
|
|
|
|
|
String word = term.word;
|
|
|
|
|
Integer value = term.getFrequency();
|
|
|
|
|
if (!temp.containsKey(word)) {
|
|
|
|
@ -500,6 +530,8 @@ public class DocInfoService extends EsService<DocInfo> {
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int count = 0;
|
|
|
|
|
int count2 = 0;
|
|
|
|
|
for (Map.Entry<String, Integer> entry : temp.entrySet()) {
|
|
|
|
|