From 30eee5ac36d8f728d8850da30b04d033bdf5322e Mon Sep 17 00:00:00 2001 From: xiaoCJ <406612557@qq.com> Date: Sun, 25 Jun 2023 16:23:57 +0800 Subject: [PATCH 1/3] =?UTF-8?q?=E6=83=85=E7=BB=AA=E5=88=86=E6=9E=90?= =?UTF-8?q?=E9=A1=B5=E9=9D=A2=E6=8E=A5=E5=8F=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../controller/DocInfoController.java | 8 ++ .../biemo/business/domain/EmotionResult.java | 50 ++++++++++++ .../business/service/DocInfoService.java | 79 ++++++++++++++----- 3 files changed, 119 insertions(+), 18 deletions(-) create mode 100644 ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/domain/EmotionResult.java diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/DocInfoController.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/DocInfoController.java index 5511f83..3b53fa9 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/DocInfoController.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/DocInfoController.java @@ -11,6 +11,7 @@ import com.hankcs.hanlp.seg.Segment; import com.hankcs.hanlp.seg.common.Term; import com.ruoyi.biemo.business.domain.Category; import com.ruoyi.biemo.business.domain.DocInfo; +import com.ruoyi.biemo.business.domain.EmotionResult; import com.ruoyi.biemo.business.domain.WordCloudItem; import com.ruoyi.biemo.business.service.CategoryService; import com.ruoyi.biemo.business.service.DocInfoService; @@ -77,8 +78,15 @@ public class DocInfoController extends BaseController { @GetMapping("/getWordCloudByCateId/{categoryId}") public AjaxResult getWordCloudByCateId(@PathVariable String categoryId){ List wordCloudItems = docInfoService.getWordCloudByCateId(categoryId); + return AjaxResult.success(wordCloudItems); } + + @GetMapping("/getEmotionAnalysisByCateId/{categoryId}") + public AjaxResult getEmotionAnalysis(@PathVariable String categoryId){ + EmotionResult emotionResult = docInfoService.getEmotionAnalysis(categoryId); + return AjaxResult.success(emotionResult); + } /** * 导出文章管理列表 */ diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/domain/EmotionResult.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/domain/EmotionResult.java new file mode 100644 index 0000000..8cb3a51 --- /dev/null +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/domain/EmotionResult.java @@ -0,0 +1,50 @@ +package com.ruoyi.biemo.business.domain; + +public class EmotionResult { + private String downName; + private String upName; + private int downCount; + private int upCount; + + public EmotionResult(String downName, String upName, int downCount, int upCount) { + this.downName = downName; + this.upName = upName; + this.downCount = downCount; + this.upCount = upCount; + } + + public String getDownName() { + return downName; + } + + public String getUpName() { + return upName; + } + + public int getDownCount() { + return downCount; + } + + public int getUpCount() { + return upCount; + } + + public void setDownName(String downName) { + this.downName = downName; + } + + public void setUpName(String upName) { + this.upName = upName; + } + + public void setDownCount(int downCount) { + this.downCount = downCount; + } + + public void setUpCount(int upCount) { + this.upCount = upCount; + } + + public EmotionResult() { + } +} diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java index 8138bb3..8ea237d 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java @@ -4,10 +4,7 @@ import com.alibaba.fastjson.JSONObject; import com.github.pagehelper.util.StringUtil; import com.hankcs.hanlp.seg.common.Term; import com.hankcs.hanlp.tokenizer.NLPTokenizer; -import com.ruoyi.biemo.business.domain.DocInfo; -import com.ruoyi.biemo.business.domain.Node; -import com.ruoyi.biemo.business.domain.Relationship; -import com.ruoyi.biemo.business.domain.WordCloudItem; +import com.ruoyi.biemo.business.domain.*; import com.ruoyi.biemo.business.domain.event.DocInfoDeleteEvent; import com.ruoyi.biemo.business.domain.event.DocInfoSaveEvent; import com.ruoyi.biemo.business.response.MyResultResponse; @@ -15,6 +12,7 @@ import com.ruoyi.biemo.core.page.Page; import com.ruoyi.biemo.elasticsearch.util.EsService; import com.ruoyi.biemo.mongodb.utils.MongoHelper; import com.ruoyi.biemo.nlp.DependencyParserUtils; +import com.ruoyi.biemo.nlp.SentimentAnalysisUtils; import com.ruoyi.biemo.nlp.SummaryUtils; import com.ruoyi.biemo.utils.MyObjects; import com.ruoyi.common.core.domain.AjaxResult; @@ -238,7 +236,7 @@ public class DocInfoService extends EsService { } // 获取命名实体 try { - Map> nerTagSet = DependencyParserUtils.getMyNERTagSet(strArr); + Map> nerTagSet = DependencyParserUtils.getMyNERTagSet(strArr); docInfo.setParserNamedEntity(JSONObject.toJSONString(nerTagSet)); docInfo.setSummary(SummaryUtils.autoSummary(content)); } catch (Exception e) { @@ -344,19 +342,19 @@ public class DocInfoService extends EsService { } } MyResultResponse myResultResponse = new MyResultResponse(); - List results = new ArrayList<>(); + List results = new ArrayList<>(); List results2 = response2.getResults(); List results3 = response3.getResults(); - results.addAll(0,results3); - results.addAll(1,results2); + results.addAll(0, results3); + results.addAll(1, results2); List errors = myResultResponse.getErrors(); errors.addAll(response2.getErrors()); errors.addAll(response3.getErrors()); myResultResponse.setResults(results); myResultResponse.setErrors(errors); - return myResultResponse; + return myResultResponse; } //文章管理--批量分析 @@ -449,28 +447,29 @@ public class DocInfoService extends EsService { // insertOrUpdateDocInfo(docInfo); return response; } + public List getWordCloudByCateId(String categoryId) { String regex = "<.*?>"; // 匹配HTML标签的正则表达式 - Map temp = new ConcurrentHashMap<>(); + Map temp = new ConcurrentHashMap<>(); List wordCloudItemList = new ArrayList<>(); DocInfo docInfo = new DocInfo(); docInfo.setCateId(categoryId); List docInfoList = selectDocInfoList(docInfo); - if(CollectionUtils.isNotEmpty(docInfoList)){ - List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+","").replaceAll("[,,  '“”.。]", "").trim()).stream()).collect(Collectors.toList()); - if(CollectionUtils.isNotEmpty(termList)){ + if (CollectionUtils.isNotEmpty(docInfoList)) { + List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll("[,,  '“”.。]", "").trim()).stream()).collect(Collectors.toList()); + if (CollectionUtils.isNotEmpty(termList)) { termList.parallelStream().forEach(term -> { String word = term.word; Integer value = term.getFrequency(); - if(!temp.containsKey(word)){ - temp.put(word,1); - }else{ - temp.put(word,temp.get(word)+1); + if (!temp.containsKey(word)) { + temp.put(word, 1); + } else { + temp.put(word, temp.get(word) + 1); } }); } } - for(Map.Entry entry : temp.entrySet()){ + for (Map.Entry entry : temp.entrySet()) { WordCloudItem wordCloudItem = new WordCloudItem(); wordCloudItem.setName(entry.getKey()); wordCloudItem.setValue(entry.getValue()); @@ -478,4 +477,48 @@ public class DocInfoService extends EsService { } return wordCloudItemList; } + + public EmotionResult getEmotionAnalysis(String categoryId) { + String regex = "<.*?>"; // 匹配HTML标签的正则表达式 + Map temp = new ConcurrentHashMap<>(); +// List emotionResultItemList = new ArrayList<>(); + EmotionResult emotionResult1 = new EmotionResult(); + DocInfo docInfo = new DocInfo(); + docInfo.setCateId(categoryId); + List docInfoList = selectDocInfoList(docInfo); + if (CollectionUtils.isNotEmpty(docInfoList)) { + List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll("[,,  '“”.。]", "").trim()).stream()).collect(Collectors.toList()); + if (CollectionUtils.isNotEmpty(termList)) { + termList.parallelStream().forEach(term -> { + String word = term.word; + Integer value = term.getFrequency(); + if (!temp.containsKey(word)) { + temp.put(word, 1); + } else { + temp.put(word, temp.get(word) + 1); + } + }); + } + } + int count = 0; + int count2 = 0; + for (Map.Entry entry : temp.entrySet()) { + EmotionResult emotionResult = new EmotionResult(); + String key = entry.getKey(); + String analysis = SentimentAnalysisUtils.analysis(key); + if (analysis.equals("正面")) { + count++; + } else { + count2++; + } + emotionResult.setUpCount(count); + emotionResult.setDownCount(count2); +// emotionResultItemList.add(emotionResult); + emotionResult1 = emotionResult; + } + emotionResult1.setDownName("负面"); + emotionResult1.setUpName("正面"); + return emotionResult1; + } } + From be3ae0be81d75cc06ea9b40f3ea0c7a7e468a346 Mon Sep 17 00:00:00 2001 From: xiaoCJ <406612557@qq.com> Date: Sun, 25 Jun 2023 17:17:39 +0800 Subject: [PATCH 2/3] =?UTF-8?q?=E5=8E=BB=E9=99=A4=E5=89=8D=E7=AB=AF?= =?UTF-8?q?=E6=A0=87=E7=AD=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../business/service/DocInfoService.java | 74 +++++++++++++------ .../com/ruoyi/biemo/utils/FormatUtil.java | 59 +++++++++++++++ 2 files changed, 112 insertions(+), 21 deletions(-) create mode 100644 ruoyi-biemo/src/main/java/com/ruoyi/biemo/utils/FormatUtil.java diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java index 8ea237d..a6922a8 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java @@ -14,6 +14,7 @@ import com.ruoyi.biemo.mongodb.utils.MongoHelper; import com.ruoyi.biemo.nlp.DependencyParserUtils; import com.ruoyi.biemo.nlp.SentimentAnalysisUtils; import com.ruoyi.biemo.nlp.SummaryUtils; +import com.ruoyi.biemo.utils.FormatUtil; import com.ruoyi.biemo.utils.MyObjects; import com.ruoyi.common.core.domain.AjaxResult; import com.ruoyi.common.utils.StringUtils; @@ -29,10 +30,12 @@ import org.springframework.context.event.EventListener; import org.springframework.stereotype.Service; import javax.print.Doc; +import java.io.IOException; import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; +import java.util.stream.Stream; /** * @author makesoft @@ -450,36 +453,53 @@ public class DocInfoService extends EsService { public List getWordCloudByCateId(String categoryId) { String regex = "<.*?>"; // 匹配HTML标签的正则表达式 + String regEx="[\n`~1234567890!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。, 、?\n" + + "  ]"; Map temp = new ConcurrentHashMap<>(); List wordCloudItemList = new ArrayList<>(); DocInfo docInfo = new DocInfo(); docInfo.setCateId(categoryId); List docInfoList = selectDocInfoList(docInfo); - if (CollectionUtils.isNotEmpty(docInfoList)) { - List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll("[,,  '“”.。]", "").trim()).stream()).collect(Collectors.toList()); - if (CollectionUtils.isNotEmpty(termList)) { - termList.parallelStream().forEach(term -> { - String word = term.word; - Integer value = term.getFrequency(); - if (!temp.containsKey(word)) { - temp.put(word, 1); - } else { - temp.put(word, temp.get(word) + 1); - } - }); + List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> { + try { + return NLPTokenizer. + segment(FormatUtil.RemovalOfStopWords(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll(regEx, "").trim()) + ).stream(); + } catch (IOException e) { + e.printStackTrace(); + return Stream.empty(); } + }).collect(Collectors.toList()); + if (CollectionUtils.isNotEmpty(termList)) { + termList.parallelStream().filter(t->t.word.length()>1).forEach(term -> { + String word = term.word; + Integer value = term.getFrequency(); + if (!temp.containsKey(word)) { + temp.put(word, 1); + } else { + temp.put(word, temp.get(word) + 1); + } + }); } - for (Map.Entry entry : temp.entrySet()) { - WordCloudItem wordCloudItem = new WordCloudItem(); - wordCloudItem.setName(entry.getKey()); - wordCloudItem.setValue(entry.getValue()); - wordCloudItemList.add(wordCloudItem); - } - return wordCloudItemList; + + for( + Map.Entry entry :temp.entrySet()) + + { + WordCloudItem wordCloudItem = new WordCloudItem(); + wordCloudItem.setName(entry.getKey()); + wordCloudItem.setValue(entry.getValue()); + wordCloudItemList.add(wordCloudItem); } + return wordCloudItemList; +} + + + public EmotionResult getEmotionAnalysis(String categoryId) { String regex = "<.*?>"; // 匹配HTML标签的正则表达式 + String regEx="[\n`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。, 、?]"; Map temp = new ConcurrentHashMap<>(); // List emotionResultItemList = new ArrayList<>(); EmotionResult emotionResult1 = new EmotionResult(); @@ -487,9 +507,19 @@ public class DocInfoService extends EsService { docInfo.setCateId(categoryId); List docInfoList = selectDocInfoList(docInfo); if (CollectionUtils.isNotEmpty(docInfoList)) { - List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll("[,,  '“”.。]", "").trim()).stream()).collect(Collectors.toList()); +// List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll("[,,  '“”.。]", "").trim()).stream()).collect(Collectors.toList()); + List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> { + try { + return NLPTokenizer. + segment(FormatUtil.RemovalOfStopWords(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll(regEx, "").trim()) + ).stream(); + } catch (IOException e) { + e.printStackTrace(); + return Stream.empty(); + } + }).collect(Collectors.toList()); if (CollectionUtils.isNotEmpty(termList)) { - termList.parallelStream().forEach(term -> { + termList.parallelStream().filter(t->t.word.length()>1).forEach(term -> { String word = term.word; Integer value = term.getFrequency(); if (!temp.containsKey(word)) { @@ -500,6 +530,8 @@ public class DocInfoService extends EsService { }); } } + + int count = 0; int count2 = 0; for (Map.Entry entry : temp.entrySet()) { diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/utils/FormatUtil.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/utils/FormatUtil.java new file mode 100644 index 0000000..c70c6cd --- /dev/null +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/utils/FormatUtil.java @@ -0,0 +1,59 @@ +package com.ruoyi.biemo.utils; + + +import com.hankcs.hanlp.HanLP; +import com.hankcs.hanlp.seg.common.Term; + + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class FormatUtil { + + /** + * 去除停用词 + * @param oldString:原中文文本 + * @return 去除停用词之后的中文文本 + * @throws IOException + */ + public static String RemovalOfStopWords(String oldString) throws IOException { + String newString = oldString; + + // 分词 + List termList = HanLP.segment(newString); + System.out.println(termList); + + + // 中文 停用词 .txt 文件路径 + String filePath = "D:\\停用词.txt"; + File file = new File(filePath); + + BufferedReader bufferedReader = new BufferedReader(new FileReader(file)); + List stopWords = new ArrayList<>(); + String temp = null; + while ((temp = bufferedReader.readLine()) != null) { + //System.out.println("*" + temp+ "*"); + stopWords.add(temp.trim()); + } + + List termStringList = new ArrayList<>(); + for(Term term:termList) { + termStringList.add(term.word); + //System.out.println("*" + term.word + "*"); + } + + termStringList.removeAll(stopWords); + + newString = ""; + for (String string:termStringList) { + newString += string; + } + + return newString; + } + +} \ No newline at end of file From 628f94b1e81d005df455a9fb53b5591c6a790c58 Mon Sep 17 00:00:00 2001 From: xiaoCJ <406612557@qq.com> Date: Sun, 25 Jun 2023 17:17:51 +0800 Subject: [PATCH 3/3] =?UTF-8?q?Revert=20"=E5=8E=BB=E9=99=A4=E5=89=8D?= =?UTF-8?q?=E7=AB=AF=E6=A0=87=E7=AD=BE"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit be3ae0be81d75cc06ea9b40f3ea0c7a7e468a346. --- .../business/service/DocInfoService.java | 74 ++++++------------- .../com/ruoyi/biemo/utils/FormatUtil.java | 59 --------------- 2 files changed, 21 insertions(+), 112 deletions(-) delete mode 100644 ruoyi-biemo/src/main/java/com/ruoyi/biemo/utils/FormatUtil.java diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java index a6922a8..8ea237d 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java @@ -14,7 +14,6 @@ import com.ruoyi.biemo.mongodb.utils.MongoHelper; import com.ruoyi.biemo.nlp.DependencyParserUtils; import com.ruoyi.biemo.nlp.SentimentAnalysisUtils; import com.ruoyi.biemo.nlp.SummaryUtils; -import com.ruoyi.biemo.utils.FormatUtil; import com.ruoyi.biemo.utils.MyObjects; import com.ruoyi.common.core.domain.AjaxResult; import com.ruoyi.common.utils.StringUtils; @@ -30,12 +29,10 @@ import org.springframework.context.event.EventListener; import org.springframework.stereotype.Service; import javax.print.Doc; -import java.io.IOException; import java.util.*; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; -import java.util.stream.Stream; /** * @author makesoft @@ -453,53 +450,36 @@ public class DocInfoService extends EsService { public List getWordCloudByCateId(String categoryId) { String regex = "<.*?>"; // 匹配HTML标签的正则表达式 - String regEx="[\n`~1234567890!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。, 、?\n" + - "  ]"; Map temp = new ConcurrentHashMap<>(); List wordCloudItemList = new ArrayList<>(); DocInfo docInfo = new DocInfo(); docInfo.setCateId(categoryId); List docInfoList = selectDocInfoList(docInfo); - List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> { - try { - return NLPTokenizer. - segment(FormatUtil.RemovalOfStopWords(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll(regEx, "").trim()) - ).stream(); - } catch (IOException e) { - e.printStackTrace(); - return Stream.empty(); + if (CollectionUtils.isNotEmpty(docInfoList)) { + List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll("[,,  '“”.。]", "").trim()).stream()).collect(Collectors.toList()); + if (CollectionUtils.isNotEmpty(termList)) { + termList.parallelStream().forEach(term -> { + String word = term.word; + Integer value = term.getFrequency(); + if (!temp.containsKey(word)) { + temp.put(word, 1); + } else { + temp.put(word, temp.get(word) + 1); + } + }); } - }).collect(Collectors.toList()); - if (CollectionUtils.isNotEmpty(termList)) { - termList.parallelStream().filter(t->t.word.length()>1).forEach(term -> { - String word = term.word; - Integer value = term.getFrequency(); - if (!temp.containsKey(word)) { - temp.put(word, 1); - } else { - temp.put(word, temp.get(word) + 1); - } - }); } - - for( - Map.Entry entry :temp.entrySet()) - - { - WordCloudItem wordCloudItem = new WordCloudItem(); - wordCloudItem.setName(entry.getKey()); - wordCloudItem.setValue(entry.getValue()); - wordCloudItemList.add(wordCloudItem); - } + for (Map.Entry entry : temp.entrySet()) { + WordCloudItem wordCloudItem = new WordCloudItem(); + wordCloudItem.setName(entry.getKey()); + wordCloudItem.setValue(entry.getValue()); + wordCloudItemList.add(wordCloudItem); + } return wordCloudItemList; -} - - - + } public EmotionResult getEmotionAnalysis(String categoryId) { String regex = "<.*?>"; // 匹配HTML标签的正则表达式 - String regEx="[\n`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。, 、?]"; Map temp = new ConcurrentHashMap<>(); // List emotionResultItemList = new ArrayList<>(); EmotionResult emotionResult1 = new EmotionResult(); @@ -507,19 +487,9 @@ public class DocInfoService extends EsService { docInfo.setCateId(categoryId); List docInfoList = selectDocInfoList(docInfo); if (CollectionUtils.isNotEmpty(docInfoList)) { -// List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll("[,,  '“”.。]", "").trim()).stream()).collect(Collectors.toList()); - List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> { - try { - return NLPTokenizer. - segment(FormatUtil.RemovalOfStopWords(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll(regEx, "").trim()) - ).stream(); - } catch (IOException e) { - e.printStackTrace(); - return Stream.empty(); - } - }).collect(Collectors.toList()); + List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+", "").replaceAll("[,,  '“”.。]", "").trim()).stream()).collect(Collectors.toList()); if (CollectionUtils.isNotEmpty(termList)) { - termList.parallelStream().filter(t->t.word.length()>1).forEach(term -> { + termList.parallelStream().forEach(term -> { String word = term.word; Integer value = term.getFrequency(); if (!temp.containsKey(word)) { @@ -530,8 +500,6 @@ public class DocInfoService extends EsService { }); } } - - int count = 0; int count2 = 0; for (Map.Entry entry : temp.entrySet()) { diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/utils/FormatUtil.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/utils/FormatUtil.java deleted file mode 100644 index c70c6cd..0000000 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/utils/FormatUtil.java +++ /dev/null @@ -1,59 +0,0 @@ -package com.ruoyi.biemo.utils; - - -import com.hankcs.hanlp.HanLP; -import com.hankcs.hanlp.seg.common.Term; - - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -public class FormatUtil { - - /** - * 去除停用词 - * @param oldString:原中文文本 - * @return 去除停用词之后的中文文本 - * @throws IOException - */ - public static String RemovalOfStopWords(String oldString) throws IOException { - String newString = oldString; - - // 分词 - List termList = HanLP.segment(newString); - System.out.println(termList); - - - // 中文 停用词 .txt 文件路径 - String filePath = "D:\\停用词.txt"; - File file = new File(filePath); - - BufferedReader bufferedReader = new BufferedReader(new FileReader(file)); - List stopWords = new ArrayList<>(); - String temp = null; - while ((temp = bufferedReader.readLine()) != null) { - //System.out.println("*" + temp+ "*"); - stopWords.add(temp.trim()); - } - - List termStringList = new ArrayList<>(); - for(Term term:termList) { - termStringList.add(term.word); - //System.out.println("*" + term.word + "*"); - } - - termStringList.removeAll(stopWords); - - newString = ""; - for (String string:termStringList) { - newString += string; - } - - return newString; - } - -} \ No newline at end of file