From 6c1ae01fa4ede47ed534551333373a79f16a9a60 Mon Sep 17 00:00:00 2001 From: xiaoCJ <406612557@qq.com> Date: Sun, 25 Jun 2023 19:10:19 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8E=BB=E9=99=A4=E5=89=8D=E7=AB=AF=E6=A0=87?= =?UTF-8?q?=E7=AD=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../business/service/DocInfoService.java | 97 ++++++++++++++----- .../biemo/nlp/SentimentAnalysisUtils.java | 14 ++- 2 files changed, 84 insertions(+), 27 deletions(-) diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java index 8ea237d..04d531c 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java @@ -2,6 +2,8 @@ package com.ruoyi.biemo.business.service; import com.alibaba.fastjson.JSONObject; import com.github.pagehelper.util.StringUtil; +import com.hankcs.hanlp.classification.classifiers.IClassifier; +import com.hankcs.hanlp.classification.classifiers.NaiveBayesClassifier; import com.hankcs.hanlp.seg.common.Term; import com.hankcs.hanlp.tokenizer.NLPTokenizer; import com.ruoyi.biemo.business.domain.*; @@ -14,6 +16,7 @@ import com.ruoyi.biemo.mongodb.utils.MongoHelper; import com.ruoyi.biemo.nlp.DependencyParserUtils; import com.ruoyi.biemo.nlp.SentimentAnalysisUtils; import com.ruoyi.biemo.nlp.SummaryUtils; +import com.ruoyi.biemo.utils.FormatUtil; import com.ruoyi.biemo.utils.MyObjects; import com.ruoyi.common.core.domain.AjaxResult; import com.ruoyi.common.utils.StringUtils; @@ -29,10 +32,14 @@ import org.springframework.context.event.EventListener; import org.springframework.stereotype.Service; import javax.print.Doc; +import java.io.IOException; import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static com.ruoyi.biemo.nlp.SentimentAnalysisUtils.trainOrLoadModel; /** * @author makesoft @@ -450,26 +457,37 @@ public class DocInfoService extends EsService { public List getWordCloudByCateId(String categoryId) { String regex = "<.*?>"; // 匹配HTML标签的正则表达式 + String regEx = "[\n`~1234567890!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。, 、?\n" + + "  ]"; Map temp = new ConcurrentHashMap<>(); List wordCloudItemList = new ArrayList<>(); DocInfo docInfo = new DocInfo(); docInfo.setCateId(categoryId); List docInfoList = selectDocInfoList(docInfo); - if (CollectionUtils.isNotEmpty(docInfoList)) { - List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll("[,,  '“”.。]", "").trim()).stream()).collect(Collectors.toList()); - if (CollectionUtils.isNotEmpty(termList)) { - termList.parallelStream().forEach(term -> { - String word = term.word; - Integer value = term.getFrequency(); - if (!temp.containsKey(word)) { - temp.put(word, 1); - } else { - temp.put(word, temp.get(word) + 1); - } - }); + List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> { + try { + return NLPTokenizer. + segment(FormatUtil.RemovalOfStopWords(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll(regEx, "").trim()) + ).stream(); + } catch (IOException e) { + e.printStackTrace(); + return Stream.empty(); } + }).collect(Collectors.toList()); + if (CollectionUtils.isNotEmpty(termList)) { + termList.parallelStream().filter(t -> t.word.length() > 1).forEach(term -> { + String word = term.word; + Integer value = term.getFrequency(); + if (!temp.containsKey(word)) { + temp.put(word, 1); + } else { + temp.put(word, temp.get(word) + 1); + } + }); } - for (Map.Entry entry : temp.entrySet()) { + + for ( + Map.Entry entry : temp.entrySet()) { WordCloudItem wordCloudItem = new WordCloudItem(); wordCloudItem.setName(entry.getKey()); wordCloudItem.setValue(entry.getValue()); @@ -478,8 +496,10 @@ public class DocInfoService extends EsService { return wordCloudItemList; } - public EmotionResult getEmotionAnalysis(String categoryId) { + + public EmotionResult getEmotionAnalysis(String categoryId) throws IOException { String regex = "<.*?>"; // 匹配HTML标签的正则表达式 + String regEx = "[\n`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。, 、?]"; Map temp = new ConcurrentHashMap<>(); // List emotionResultItemList = new ArrayList<>(); EmotionResult emotionResult1 = new EmotionResult(); @@ -487,9 +507,13 @@ public class DocInfoService extends EsService { docInfo.setCateId(categoryId); List docInfoList = selectDocInfoList(docInfo); if (CollectionUtils.isNotEmpty(docInfoList)) { - List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll("[,,  '“”.。]", "").trim()).stream()).collect(Collectors.toList()); + List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty) + .flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "") + .replaceAll("\\s+", "").replaceAll("[,,  '“”.。]", "").trim()).stream()).collect(Collectors.toList()); +// List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> { +// NLPTokenizer.segment{_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll(regEx, "").trim()).stream().collect(Collectors.toList()); if (CollectionUtils.isNotEmpty(termList)) { - termList.parallelStream().forEach(term -> { + termList.parallelStream().filter(t -> t.word.length() > 1).forEach(term -> { String word = term.word; Integer value = term.getFrequency(); if (!temp.containsKey(word)) { @@ -500,25 +524,48 @@ public class DocInfoService extends EsService { }); } } + + +// int count = 0; +// int count2 = 0; +// for (Map.Entry entry : temp.entrySet()) { +// EmotionResult emotionResult = new EmotionResult(); +// String key = entry.getKey(); +// String analysis = SentimentAnalysisUtils.analysis(key); +// if (analysis.equals("正面")) { +// count++; +// } else { +// count2++; +// } +// emotionResult.setUpCount(count); +// emotionResult.setDownCount(count2); +// emotionResult1 = emotionResult; +// } +// emotionResult1.setDownName("负面"); +// emotionResult1.setUpName("正面"); +// return emotionResult1; + int count = 0; int count2 = 0; + EmotionResult emotionResult = new EmotionResult(); + + IClassifier classifier = new NaiveBayesClassifier(trainOrLoadModel()); for (Map.Entry entry : temp.entrySet()) { - EmotionResult emotionResult = new EmotionResult(); String key = entry.getKey(); - String analysis = SentimentAnalysisUtils.analysis(key); + String analysis = SentimentAnalysisUtils.analysis1(key, classifier); if (analysis.equals("正面")) { count++; } else { count2++; } - emotionResult.setUpCount(count); - emotionResult.setDownCount(count2); -// emotionResultItemList.add(emotionResult); - emotionResult1 = emotionResult; } - emotionResult1.setDownName("负面"); - emotionResult1.setUpName("正面"); - return emotionResult1; + emotionResult.setUpCount(count); + emotionResult.setDownCount(count2); + emotionResult.setUpName("正面"); + emotionResult.setDownName("负面"); + return emotionResult; } + } + diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/nlp/SentimentAnalysisUtils.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/nlp/SentimentAnalysisUtils.java index 6465429..c4a8262 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/nlp/SentimentAnalysisUtils.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/nlp/SentimentAnalysisUtils.java @@ -17,7 +17,7 @@ import java.util.List; */ public class SentimentAnalysisUtils { public static final String CORPUS_FOLDER = TestUtility.ensureTestData("ChnSentiCorp", "http://hanlp.linrunsoft.com/release/corpus/ChnSentiCorp.zip"); - public static final String MODEL_PATH = "data/test/sentiment-classification-model.ser"; + public static final String MODEL_PATH = "D:\\code\\TextBigData\\data\\test\\sentiment-classification-model.ser"; public static String analysis(String text){ String result = ""; @@ -30,8 +30,18 @@ public class SentimentAnalysisUtils { return result; } + public static String analysis1(String text,IClassifier classifier){ + String result = ""; + try { + result = classifier.classify(text); + } catch (Exception ioException) { + ioException.printStackTrace(); + } + return result; + } + //训练模型 - private static NaiveBayesModel trainOrLoadModel() throws IOException + public static NaiveBayesModel trainOrLoadModel() throws IOException { NaiveBayesModel model = (NaiveBayesModel) IOUtil.readObjectFrom(MODEL_PATH); if (model != null) return model;