From 6c1ae01fa4ede47ed534551333373a79f16a9a60 Mon Sep 17 00:00:00 2001
From: xiaoCJ <406612557@qq.com>
Date: Sun, 25 Jun 2023 19:10:19 +0800
Subject: [PATCH] =?UTF-8?q?=E5=8E=BB=E9=99=A4=E5=89=8D=E7=AB=AF=E6=A0=87?=
 =?UTF-8?q?=E7=AD=BE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../business/service/DocInfoService.java      | 97 ++++++++++++++-----
 .../biemo/nlp/SentimentAnalysisUtils.java     | 14 ++-
 2 files changed, 84 insertions(+), 27 deletions(-)
diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java
index 8ea237d..04d531c 100644
--- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java
+++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java
@@ -2,6 +2,8 @@ package com.ruoyi.biemo.business.service;
 
 import com.alibaba.fastjson.JSONObject;
 import com.github.pagehelper.util.StringUtil;
+import com.hankcs.hanlp.classification.classifiers.IClassifier;
+import com.hankcs.hanlp.classification.classifiers.NaiveBayesClassifier;
 import com.hankcs.hanlp.seg.common.Term;
 import com.hankcs.hanlp.tokenizer.NLPTokenizer;
 import com.ruoyi.biemo.business.domain.*;
@@ -14,6 +16,7 @@ import com.ruoyi.biemo.mongodb.utils.MongoHelper;
 import com.ruoyi.biemo.nlp.DependencyParserUtils;
 import com.ruoyi.biemo.nlp.SentimentAnalysisUtils;
 import com.ruoyi.biemo.nlp.SummaryUtils;
+import com.ruoyi.biemo.utils.FormatUtil;
 import com.ruoyi.biemo.utils.MyObjects;
 import com.ruoyi.common.core.domain.AjaxResult;
 import com.ruoyi.common.utils.StringUtils;
@@ -29,10 +32,14 @@ import org.springframework.context.event.EventListener;
 import org.springframework.stereotype.Service;
 
 import javax.print.Doc;
+import java.io.IOException;
 import java.util.*;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static com.ruoyi.biemo.nlp.SentimentAnalysisUtils.trainOrLoadModel;
 
 /**
  * @author makesoft
@@ -450,26 +457,37 @@ public class DocInfoService extends EsService<DocInfo> {
 
     public List<WordCloudItem> getWordCloudByCateId(String categoryId) {
         String regex = "<.*?>"; // 匹配HTML标签的正则表达式
+        String regEx = "[\n`~1234567890!@#$%^&*()+=|{}':;',\\[\\].<>/?~！@#￥%……&*（）——+|{}【】‘；：”“’。， 、？\n" +
+                "　　]";
         Map<String, Integer> temp = new ConcurrentHashMap<>();
         List<WordCloudItem> wordCloudItemList = new ArrayList<>();
         DocInfo docInfo = new DocInfo();
         docInfo.setCateId(categoryId);
         List<DocInfo> docInfoList = selectDocInfoList(docInfo);
-        if (CollectionUtils.isNotEmpty(docInfoList)) {
-            List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll("[,，　　'“”.。]", "").trim()).stream()).collect(Collectors.toList());
-            if (CollectionUtils.isNotEmpty(termList)) {
-                termList.parallelStream().forEach(term -> {
-                    String word = term.word;
-                    Integer value = term.getFrequency();
-                    if (!temp.containsKey(word)) {
-                        temp.put(word, 1);
-                    } else {
-                        temp.put(word, temp.get(word) + 1);
-                    }
-                });
+        List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> {
+            try {
+                return NLPTokenizer.
+                        segment(FormatUtil.RemovalOfStopWords(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll(regEx, "").trim())
+                        ).stream();
+            } catch (IOException e) {
+                e.printStackTrace();
+                return Stream.empty();
             }
+        }).collect(Collectors.toList());
+        if (CollectionUtils.isNotEmpty(termList)) {
+            termList.parallelStream().filter(t -> t.word.length() > 1).forEach(term -> {
+                String word = term.word;
+                Integer value = term.getFrequency();
+                if (!temp.containsKey(word)) {
+                    temp.put(word, 1);
+                } else {
+                    temp.put(word, temp.get(word) + 1);
+                }
+            });
         }
-        for (Map.Entry<String, Integer> entry : temp.entrySet()) {
+
+        for (
+                Map.Entry<String, Integer> entry : temp.entrySet()) {
             WordCloudItem wordCloudItem = new WordCloudItem();
             wordCloudItem.setName(entry.getKey());
             wordCloudItem.setValue(entry.getValue());
@@ -478,8 +496,10 @@ public class DocInfoService extends EsService<DocInfo> {
         return wordCloudItemList;
     }
 
-    public EmotionResult getEmotionAnalysis(String categoryId) {
+
+    public EmotionResult getEmotionAnalysis(String categoryId) throws IOException {
         String regex = "<.*?>"; // 匹配HTML标签的正则表达式
+        String regEx = "[\n`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~！@#￥%……&*（）——+|{}【】‘；：”“’。， 、？]";
         Map<String, Integer> temp = new ConcurrentHashMap<>();
 //        List<EmotionResult> emotionResultItemList = new ArrayList<>();
         EmotionResult emotionResult1 = new EmotionResult();
@@ -487,9 +507,13 @@ public class DocInfoService extends EsService<DocInfo> {
         docInfo.setCateId(categoryId);
         List<DocInfo> docInfoList = selectDocInfoList(docInfo);
         if (CollectionUtils.isNotEmpty(docInfoList)) {
-            List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll("[,，　　'“”.。]", "").trim()).stream()).collect(Collectors.toList());
+            List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty)
+                    .flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "")
+                            .replaceAll("\\s+", "").replaceAll("[,，　　'“”.。]", "").trim()).stream()).collect(Collectors.toList());
+//            List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> {
+//                     NLPTokenizer.segment{_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll(regEx, "").trim()).stream().collect(Collectors.toList());
             if (CollectionUtils.isNotEmpty(termList)) {
-                termList.parallelStream().forEach(term -> {
+                termList.parallelStream().filter(t -> t.word.length() > 1).forEach(term -> {
                     String word = term.word;
                     Integer value = term.getFrequency();
                     if (!temp.containsKey(word)) {
@@ -500,25 +524,48 @@ public class DocInfoService extends EsService<DocInfo> {
                 });
             }
         }
+
+
+//        int count = 0;
+//        int count2 = 0;
+//        for (Map.Entry<String, Integer> entry : temp.entrySet()) {
+//            EmotionResult emotionResult = new EmotionResult();
+//            String key = entry.getKey();
+//            String analysis = SentimentAnalysisUtils.analysis(key);
+//            if (analysis.equals("正面")) {
+//                count++;
+//            } else {
+//                count2++;
+//            }
+//            emotionResult.setUpCount(count);
+//            emotionResult.setDownCount(count2);
+//            emotionResult1 = emotionResult;
+//        }
+//        emotionResult1.setDownName("负面");
+//        emotionResult1.setUpName("正面");
+//        return emotionResult1;
+
         int count = 0;
         int count2 = 0;
+        EmotionResult emotionResult = new EmotionResult();
+
+        IClassifier classifier = new NaiveBayesClassifier(trainOrLoadModel());
         for (Map.Entry<String, Integer> entry : temp.entrySet()) {
-            EmotionResult emotionResult = new EmotionResult();
             String key = entry.getKey();
-            String analysis = SentimentAnalysisUtils.analysis(key);
+            String analysis = SentimentAnalysisUtils.analysis1(key, classifier);
             if (analysis.equals("正面")) {
                 count++;
             } else {
                 count2++;
             }
-            emotionResult.setUpCount(count);
-            emotionResult.setDownCount(count2);
-//            emotionResultItemList.add(emotionResult);
-            emotionResult1 = emotionResult;
         }
-        emotionResult1.setDownName("负面");
-        emotionResult1.setUpName("正面");
-        return emotionResult1;
+        emotionResult.setUpCount(count);
+        emotionResult.setDownCount(count2);
+        emotionResult.setUpName("正面");
+        emotionResult.setDownName("负面");
+        return emotionResult;
     }
+
 }
 
+
diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/nlp/SentimentAnalysisUtils.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/nlp/SentimentAnalysisUtils.java
index 6465429..c4a8262 100644
--- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/nlp/SentimentAnalysisUtils.java
+++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/nlp/SentimentAnalysisUtils.java
@@ -17,7 +17,7 @@ import java.util.List;
  */
 public class SentimentAnalysisUtils {
     public static final String CORPUS_FOLDER = TestUtility.ensureTestData("ChnSentiCorp", "http://hanlp.linrunsoft.com/release/corpus/ChnSentiCorp.zip");
-    public static final String MODEL_PATH = "data/test/sentiment-classification-model.ser";
+    public static final String MODEL_PATH = "D:\\code\\TextBigData\\data\\test\\sentiment-classification-model.ser";
 
     public static String analysis(String text){
         String result = "";
@@ -30,8 +30,18 @@ public class SentimentAnalysisUtils {
         return result;
     }
 
+    public static String analysis1(String text,IClassifier classifier){
+        String result = "";
+        try {
+            result = classifier.classify(text);
+        } catch (Exception ioException) {
+            ioException.printStackTrace();
+        }
+        return result;
+    }
+
     //训练模型
-    private static NaiveBayesModel trainOrLoadModel() throws IOException
+    public static NaiveBayesModel trainOrLoadModel() throws IOException
     {
         NaiveBayesModel model = (NaiveBayesModel) IOUtil.readObjectFrom(MODEL_PATH);
         if (model != null) return model;