diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java index 2fe2937..8138bb3 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java @@ -450,13 +450,14 @@ public class DocInfoService extends EsService { return response; } public List getWordCloudByCateId(String categoryId) { + String regex = "<.*?>"; // 匹配HTML标签的正则表达式 Map temp = new ConcurrentHashMap<>(); List wordCloudItemList = new ArrayList<>(); DocInfo docInfo = new DocInfo(); docInfo.setCateId(categoryId); List docInfoList = selectDocInfoList(docInfo); if(CollectionUtils.isNotEmpty(docInfoList)){ - List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent()).stream()).collect(Collectors.toList()); + List termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+","").replaceAll("[,,  '“”.。]", "").trim()).stream()).collect(Collectors.toList()); if(CollectionUtils.isNotEmpty(termList)){ termList.parallelStream().forEach(term -> { String word = term.word;