Merge remote-tracking branch 'origin/master'

# Conflicts:
#	ruoyi-biemo/src/main/java/com/ruoyi/biemo/nlp/SentimentAnalysisUtils.java
master
yz 2 years ago
commit f361a4f0f8

@ -2,6 +2,8 @@ package com.ruoyi.biemo.business.service;
import com.alibaba.fastjson.JSONObject;
import com.github.pagehelper.util.StringUtil;
import com.hankcs.hanlp.classification.classifiers.IClassifier;
import com.hankcs.hanlp.classification.classifiers.NaiveBayesClassifier;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.tokenizer.NLPTokenizer;
import com.ruoyi.biemo.business.domain.*;
@ -14,6 +16,7 @@ import com.ruoyi.biemo.mongodb.utils.MongoHelper;
import com.ruoyi.biemo.nlp.DependencyParserUtils;
import com.ruoyi.biemo.nlp.SentimentAnalysisUtils;
import com.ruoyi.biemo.nlp.SummaryUtils;
import com.ruoyi.biemo.utils.FormatUtil;
import com.ruoyi.biemo.utils.MyObjects;
import com.ruoyi.common.core.domain.AjaxResult;
import com.ruoyi.common.utils.StringUtils;
@ -29,10 +32,14 @@ import org.springframework.context.event.EventListener;
import org.springframework.stereotype.Service;
import javax.print.Doc;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static com.ruoyi.biemo.nlp.SentimentAnalysisUtils.trainOrLoadModel;
/**
* @author makesoft
@ -450,26 +457,37 @@ public class DocInfoService extends EsService<DocInfo> {
public List<WordCloudItem> getWordCloudByCateId(String categoryId) {
String regex = "<.*?>"; // 匹配HTML标签的正则表达式
String regEx = "[\n`~1234567890!@#$%^&*()+=|{}':;',\\[\\].<>/?~@#¥%……&*()——+|{}【】‘;:”“’。, 、?\n" +
"  ]";
Map<String, Integer> temp = new ConcurrentHashMap<>();
List<WordCloudItem> wordCloudItemList = new ArrayList<>();
DocInfo docInfo = new DocInfo();
docInfo.setCateId(categoryId);
List<DocInfo> docInfoList = selectDocInfoList(docInfo);
if (CollectionUtils.isNotEmpty(docInfoList)) {
List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll("[,  '“”.。]", "").trim()).stream()).collect(Collectors.toList());
if (CollectionUtils.isNotEmpty(termList)) {
termList.parallelStream().forEach(term -> {
String word = term.word;
Integer value = term.getFrequency();
if (!temp.containsKey(word)) {
temp.put(word, 1);
} else {
temp.put(word, temp.get(word) + 1);
}
});
List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> {
try {
return NLPTokenizer.
segment(FormatUtil.RemovalOfStopWords(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll(regEx, "").trim())
).stream();
} catch (IOException e) {
e.printStackTrace();
return Stream.empty();
}
}).collect(Collectors.toList());
if (CollectionUtils.isNotEmpty(termList)) {
termList.parallelStream().filter(t -> t.word.length() > 1).forEach(term -> {
String word = term.word;
Integer value = term.getFrequency();
if (!temp.containsKey(word)) {
temp.put(word, 1);
} else {
temp.put(word, temp.get(word) + 1);
}
});
}
for (Map.Entry<String, Integer> entry : temp.entrySet()) {
for (
Map.Entry<String, Integer> entry : temp.entrySet()) {
WordCloudItem wordCloudItem = new WordCloudItem();
wordCloudItem.setName(entry.getKey());
wordCloudItem.setValue(entry.getValue());
@ -478,8 +496,10 @@ public class DocInfoService extends EsService<DocInfo> {
return wordCloudItemList;
}
public EmotionResult getEmotionAnalysis(String categoryId) {
public EmotionResult getEmotionAnalysis(String categoryId) throws IOException {
String regex = "<.*?>"; // 匹配HTML标签的正则表达式
String regEx = "[\n`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~@#¥%……&*()——+|{}【】‘;:”“’。, 、?]";
Map<String, Integer> temp = new ConcurrentHashMap<>();
// List<EmotionResult> emotionResultItemList = new ArrayList<>();
EmotionResult emotionResult1 = new EmotionResult();
@ -487,9 +507,13 @@ public class DocInfoService extends EsService<DocInfo> {
docInfo.setCateId(categoryId);
List<DocInfo> docInfoList = selectDocInfoList(docInfo);
if (CollectionUtils.isNotEmpty(docInfoList)) {
List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll("[,  '“”.。]", "").trim()).stream()).collect(Collectors.toList());
List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty)
.flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "")
.replaceAll("\\s+", "").replaceAll("[,  '“”.。]", "").trim()).stream()).collect(Collectors.toList());
// List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> {
// NLPTokenizer.segment{_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll(regEx, "").trim()).stream().collect(Collectors.toList());
if (CollectionUtils.isNotEmpty(termList)) {
termList.parallelStream().forEach(term -> {
termList.parallelStream().filter(t -> t.word.length() > 1).forEach(term -> {
String word = term.word;
Integer value = term.getFrequency();
if (!temp.containsKey(word)) {
@ -500,25 +524,48 @@ public class DocInfoService extends EsService<DocInfo> {
});
}
}
// int count = 0;
// int count2 = 0;
// for (Map.Entry<String, Integer> entry : temp.entrySet()) {
// EmotionResult emotionResult = new EmotionResult();
// String key = entry.getKey();
// String analysis = SentimentAnalysisUtils.analysis(key);
// if (analysis.equals("正面")) {
// count++;
// } else {
// count2++;
// }
// emotionResult.setUpCount(count);
// emotionResult.setDownCount(count2);
// emotionResult1 = emotionResult;
// }
// emotionResult1.setDownName("负面");
// emotionResult1.setUpName("正面");
// return emotionResult1;
int count = 0;
int count2 = 0;
EmotionResult emotionResult = new EmotionResult();
IClassifier classifier = new NaiveBayesClassifier(trainOrLoadModel());
for (Map.Entry<String, Integer> entry : temp.entrySet()) {
EmotionResult emotionResult = new EmotionResult();
String key = entry.getKey();
String analysis = SentimentAnalysisUtils.analysis(key);
String analysis = SentimentAnalysisUtils.analysis1(key, classifier);
if (analysis.equals("正面")) {
count++;
} else {
count2++;
}
emotionResult.setUpCount(count);
emotionResult.setDownCount(count2);
// emotionResultItemList.add(emotionResult);
emotionResult1 = emotionResult;
}
emotionResult1.setDownName("负面");
emotionResult1.setUpName("正面");
return emotionResult1;
emotionResult.setUpCount(count);
emotionResult.setDownCount(count2);
emotionResult.setUpName("正面");
emotionResult.setDownName("负面");
return emotionResult;
}
}

@ -16,11 +16,8 @@ import java.util.List;
*
*/
public class SentimentAnalysisUtils {
public static final String CORPUS_FOLDER = TestUtility.ensureTest2Data("ChnSentiCorp", "http://hanlp.linrunsoft.com/release/corpus/ChnSentiCorp.zip");
public static final String MODEL_PATH = "/usr/local/textjar/analysis-hanlp/data/test/sentiment-classification-model.ser";
// public static final String MODEL_PATH = "D:\\tianze\\文本大数据\\analysis-hanlp\\data\\test\\sentiment-classification-model.ser";
public static final String CORPUS_FOLDER = TestUtility.ensureTestData("ChnSentiCorp", "http://hanlp.linrunsoft.com/release/corpus/ChnSentiCorp.zip");
public static final String MODEL_PATH = "D:\\code\\TextBigData\\data\\test\\sentiment-classification-model.ser";
public static String analysis(String text){
String result = "";
@ -33,8 +30,18 @@ public class SentimentAnalysisUtils {
return result;
}
public static String analysis1(String text,IClassifier classifier){
String result = "";
try {
result = classifier.classify(text);
} catch (Exception ioException) {
ioException.printStackTrace();
}
return result;
}
//训练模型
private static NaiveBayesModel trainOrLoadModel() throws IOException
public static NaiveBayesModel trainOrLoadModel() throws IOException
{
NaiveBayesModel model = (NaiveBayesModel) IOUtil.readObjectFrom(MODEL_PATH);
if (model != null) return model;
@ -108,9 +115,6 @@ public class SentimentAnalysisUtils {
}
public static void main(String[] args) {
System.out.println("**********");
System.out.println(MODEL_PATH);
System.out.println("**********");
String[] arr = new String[]{"测试","中性"};
removeSentiments(arr);
}

Loading…
Cancel
Save