|
|
|
@ -2,6 +2,8 @@ package com.ruoyi.biemo.business.service;
|
|
|
|
|
|
|
|
|
|
import com.alibaba.fastjson.JSONObject;
|
|
|
|
|
import com.github.pagehelper.util.StringUtil;
|
|
|
|
|
import com.hankcs.hanlp.classification.classifiers.IClassifier;
|
|
|
|
|
import com.hankcs.hanlp.classification.classifiers.NaiveBayesClassifier;
|
|
|
|
|
import com.hankcs.hanlp.seg.common.Term;
|
|
|
|
|
import com.hankcs.hanlp.tokenizer.NLPTokenizer;
|
|
|
|
|
import com.ruoyi.biemo.business.domain.*;
|
|
|
|
@ -14,6 +16,7 @@ import com.ruoyi.biemo.mongodb.utils.MongoHelper;
|
|
|
|
|
import com.ruoyi.biemo.nlp.DependencyParserUtils;
|
|
|
|
|
import com.ruoyi.biemo.nlp.SentimentAnalysisUtils;
|
|
|
|
|
import com.ruoyi.biemo.nlp.SummaryUtils;
|
|
|
|
|
import com.ruoyi.biemo.utils.FormatUtil;
|
|
|
|
|
import com.ruoyi.biemo.utils.MyObjects;
|
|
|
|
|
import com.ruoyi.common.core.domain.AjaxResult;
|
|
|
|
|
import com.ruoyi.common.utils.StringUtils;
|
|
|
|
@ -29,10 +32,14 @@ import org.springframework.context.event.EventListener;
|
|
|
|
|
import org.springframework.stereotype.Service;
|
|
|
|
|
|
|
|
|
|
import javax.print.Doc;
|
|
|
|
|
import java.io.IOException;
|
|
|
|
|
import java.util.*;
|
|
|
|
|
import java.util.concurrent.ConcurrentHashMap;
|
|
|
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
|
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
import java.util.stream.Stream;
|
|
|
|
|
|
|
|
|
|
import static com.ruoyi.biemo.nlp.SentimentAnalysisUtils.trainOrLoadModel;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @author makesoft
|
|
|
|
@ -450,26 +457,37 @@ public class DocInfoService extends EsService<DocInfo> {
|
|
|
|
|
|
|
|
|
|
public List<WordCloudItem> getWordCloudByCateId(String categoryId) {
|
|
|
|
|
String regex = "<.*?>"; // 匹配HTML标签的正则表达式
|
|
|
|
|
String regEx = "[\n`~1234567890!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。, 、?\n" +
|
|
|
|
|
" ]";
|
|
|
|
|
Map<String, Integer> temp = new ConcurrentHashMap<>();
|
|
|
|
|
List<WordCloudItem> wordCloudItemList = new ArrayList<>();
|
|
|
|
|
DocInfo docInfo = new DocInfo();
|
|
|
|
|
docInfo.setCateId(categoryId);
|
|
|
|
|
List<DocInfo> docInfoList = selectDocInfoList(docInfo);
|
|
|
|
|
if (CollectionUtils.isNotEmpty(docInfoList)) {
|
|
|
|
|
List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll("[,, '“”.。]", "").trim()).stream()).collect(Collectors.toList());
|
|
|
|
|
if (CollectionUtils.isNotEmpty(termList)) {
|
|
|
|
|
termList.parallelStream().forEach(term -> {
|
|
|
|
|
String word = term.word;
|
|
|
|
|
Integer value = term.getFrequency();
|
|
|
|
|
if (!temp.containsKey(word)) {
|
|
|
|
|
temp.put(word, 1);
|
|
|
|
|
} else {
|
|
|
|
|
temp.put(word, temp.get(word) + 1);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> {
|
|
|
|
|
try {
|
|
|
|
|
return NLPTokenizer.
|
|
|
|
|
segment(FormatUtil.RemovalOfStopWords(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll(regEx, "").trim())
|
|
|
|
|
).stream();
|
|
|
|
|
} catch (IOException e) {
|
|
|
|
|
e.printStackTrace();
|
|
|
|
|
return Stream.empty();
|
|
|
|
|
}
|
|
|
|
|
}).collect(Collectors.toList());
|
|
|
|
|
if (CollectionUtils.isNotEmpty(termList)) {
|
|
|
|
|
termList.parallelStream().filter(t -> t.word.length() > 1).forEach(term -> {
|
|
|
|
|
String word = term.word;
|
|
|
|
|
Integer value = term.getFrequency();
|
|
|
|
|
if (!temp.containsKey(word)) {
|
|
|
|
|
temp.put(word, 1);
|
|
|
|
|
} else {
|
|
|
|
|
temp.put(word, temp.get(word) + 1);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
for (Map.Entry<String, Integer> entry : temp.entrySet()) {
|
|
|
|
|
|
|
|
|
|
for (
|
|
|
|
|
Map.Entry<String, Integer> entry : temp.entrySet()) {
|
|
|
|
|
WordCloudItem wordCloudItem = new WordCloudItem();
|
|
|
|
|
wordCloudItem.setName(entry.getKey());
|
|
|
|
|
wordCloudItem.setValue(entry.getValue());
|
|
|
|
@ -478,8 +496,10 @@ public class DocInfoService extends EsService<DocInfo> {
|
|
|
|
|
return wordCloudItemList;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public EmotionResult getEmotionAnalysis(String categoryId) {
|
|
|
|
|
|
|
|
|
|
public EmotionResult getEmotionAnalysis(String categoryId) throws IOException {
|
|
|
|
|
String regex = "<.*?>"; // 匹配HTML标签的正则表达式
|
|
|
|
|
String regEx = "[\n`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。, 、?]";
|
|
|
|
|
Map<String, Integer> temp = new ConcurrentHashMap<>();
|
|
|
|
|
// List<EmotionResult> emotionResultItemList = new ArrayList<>();
|
|
|
|
|
EmotionResult emotionResult1 = new EmotionResult();
|
|
|
|
@ -487,9 +507,13 @@ public class DocInfoService extends EsService<DocInfo> {
|
|
|
|
|
docInfo.setCateId(categoryId);
|
|
|
|
|
List<DocInfo> docInfoList = selectDocInfoList(docInfo);
|
|
|
|
|
if (CollectionUtils.isNotEmpty(docInfoList)) {
|
|
|
|
|
List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll("[,, '“”.。]", "").trim()).stream()).collect(Collectors.toList());
|
|
|
|
|
List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty)
|
|
|
|
|
.flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "")
|
|
|
|
|
.replaceAll("\\s+", "").replaceAll("[,, '“”.。]", "").trim()).stream()).collect(Collectors.toList());
|
|
|
|
|
// List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> {
|
|
|
|
|
// NLPTokenizer.segment{_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll(regEx, "").trim()).stream().collect(Collectors.toList());
|
|
|
|
|
if (CollectionUtils.isNotEmpty(termList)) {
|
|
|
|
|
termList.parallelStream().forEach(term -> {
|
|
|
|
|
termList.parallelStream().filter(t -> t.word.length() > 1).forEach(term -> {
|
|
|
|
|
String word = term.word;
|
|
|
|
|
Integer value = term.getFrequency();
|
|
|
|
|
if (!temp.containsKey(word)) {
|
|
|
|
@ -500,25 +524,48 @@ public class DocInfoService extends EsService<DocInfo> {
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// int count = 0;
|
|
|
|
|
// int count2 = 0;
|
|
|
|
|
// for (Map.Entry<String, Integer> entry : temp.entrySet()) {
|
|
|
|
|
// EmotionResult emotionResult = new EmotionResult();
|
|
|
|
|
// String key = entry.getKey();
|
|
|
|
|
// String analysis = SentimentAnalysisUtils.analysis(key);
|
|
|
|
|
// if (analysis.equals("正面")) {
|
|
|
|
|
// count++;
|
|
|
|
|
// } else {
|
|
|
|
|
// count2++;
|
|
|
|
|
// }
|
|
|
|
|
// emotionResult.setUpCount(count);
|
|
|
|
|
// emotionResult.setDownCount(count2);
|
|
|
|
|
// emotionResult1 = emotionResult;
|
|
|
|
|
// }
|
|
|
|
|
// emotionResult1.setDownName("负面");
|
|
|
|
|
// emotionResult1.setUpName("正面");
|
|
|
|
|
// return emotionResult1;
|
|
|
|
|
|
|
|
|
|
int count = 0;
|
|
|
|
|
int count2 = 0;
|
|
|
|
|
EmotionResult emotionResult = new EmotionResult();
|
|
|
|
|
|
|
|
|
|
IClassifier classifier = new NaiveBayesClassifier(trainOrLoadModel());
|
|
|
|
|
for (Map.Entry<String, Integer> entry : temp.entrySet()) {
|
|
|
|
|
EmotionResult emotionResult = new EmotionResult();
|
|
|
|
|
String key = entry.getKey();
|
|
|
|
|
String analysis = SentimentAnalysisUtils.analysis(key);
|
|
|
|
|
String analysis = SentimentAnalysisUtils.analysis1(key, classifier);
|
|
|
|
|
if (analysis.equals("正面")) {
|
|
|
|
|
count++;
|
|
|
|
|
} else {
|
|
|
|
|
count2++;
|
|
|
|
|
}
|
|
|
|
|
emotionResult.setUpCount(count);
|
|
|
|
|
emotionResult.setDownCount(count2);
|
|
|
|
|
// emotionResultItemList.add(emotionResult);
|
|
|
|
|
emotionResult1 = emotionResult;
|
|
|
|
|
}
|
|
|
|
|
emotionResult1.setDownName("负面");
|
|
|
|
|
emotionResult1.setUpName("正面");
|
|
|
|
|
return emotionResult1;
|
|
|
|
|
emotionResult.setUpCount(count);
|
|
|
|
|
emotionResult.setDownCount(count2);
|
|
|
|
|
emotionResult.setUpName("正面");
|
|
|
|
|
emotionResult.setDownName("负面");
|
|
|
|
|
return emotionResult;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|