From ff834b33106fec4d38ebce16103910870693c02e Mon Sep 17 00:00:00 2001 From: xiaoCJ <406612557@qq.com> Date: Sun, 25 Jun 2023 11:15:14 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8E=BB=E9=99=A4=E5=89=8D=E7=AB=AF=E6=A0=87?= =?UTF-8?q?=E7=AD=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/com/ruoyi/biemo/business/service/DocInfoService.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java index 2fe2937..8138bb3 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java @@ -450,13 +450,14 @@ public class DocInfoService extends EsService { return response; } public List getWordCloudByCateId(String categoryId) { + String regex = "<.*?>"; // 匹配HTML标签的正则表达式 Map temp = new ConcurrentHashMap<>(); List wordCloudItemList = new ArrayList<>(); DocInfo docInfo = new DocInfo(); docInfo.setCateId(categoryId); List docInfoList = selectDocInfoList(docInfo); if(CollectionUtils.isNotEmpty(docInfoList)){ - List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent()).stream()).collect(Collectors.toList()); + List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+","").replaceAll("[,,  '“”.。]", "").trim()).stream()).collect(Collectors.toList()); if(CollectionUtils.isNotEmpty(termList)){ termList.parallelStream().forEach(term -> { String word = term.word;