去除前端标签

master
xiaoCJ 2 years ago
parent 1abc0f4b17
commit ff834b3310

@ -450,13 +450,14 @@ public class DocInfoService extends EsService<DocInfo> {
return response;
}
public List<WordCloudItem> getWordCloudByCateId(String categoryId) {
String regex = "<.*?>"; // 匹配HTML标签的正则表达式
Map<String,Integer> temp = new ConcurrentHashMap<>();
List<WordCloudItem> wordCloudItemList = new ArrayList<>();
DocInfo docInfo = new DocInfo();
docInfo.setCateId(categoryId);
List<DocInfo> docInfoList = selectDocInfoList(docInfo);
if(CollectionUtils.isNotEmpty(docInfoList)){
List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent()).stream()).collect(Collectors.toList());
List<Term> termList = docInfoList.parallelStream().filter(ObjectUtils::isNotEmpty).flatMap(_docInfo -> NLPTokenizer.segment(_docInfo.getContent().replaceAll(regex, "").replaceAll("\\s+","").replaceAll("[,  '“”.。]", "").trim()).stream()).collect(Collectors.toList());
if(CollectionUtils.isNotEmpty(termList)){
termList.parallelStream().forEach(term -> {
String word = term.word;

Loading…
Cancel
Save