From 58a473a8780d7f0771cfdef5ec6520ddbc8bbbbe Mon Sep 17 00:00:00 2001 From: yz <3614508250@qq.com> Date: Fri, 2 Jun 2023 09:15:45 +0800 Subject: [PATCH] =?UTF-8?q?=E8=AF=8D=E6=80=A7=E6=A0=87=E6=B3=A8=E4=BF=AE?= =?UTF-8?q?=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../controller/DocInfoController.java | 130 +++++++++++++----- .../ruoyi/biemo/business/domain/DocInfo.java | 4 +- 2 files changed, 99 insertions(+), 35 deletions(-) diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/DocInfoController.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/DocInfoController.java index 33501cf..d9bbf67 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/DocInfoController.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/DocInfoController.java @@ -2,9 +2,13 @@ package com.ruoyi.biemo.business.controller; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; +import com.hankcs.hanlp.HanLP; import com.hankcs.hanlp.corpus.document.sentence.Sentence; import com.hankcs.hanlp.corpus.document.sentence.word.IWord; import com.hankcs.hanlp.corpus.document.sentence.word.Word; +import com.hankcs.hanlp.dictionary.CustomDictionary; +import com.hankcs.hanlp.seg.Segment; +import com.hankcs.hanlp.seg.common.Term; import com.ruoyi.biemo.business.domain.Category; import com.ruoyi.biemo.business.domain.DocInfo; import com.ruoyi.biemo.mongodb.entity.SplitWordDrill; @@ -13,6 +17,7 @@ import com.ruoyi.biemo.business.service.DocInfoService; import com.ruoyi.biemo.core.page.Page; import com.ruoyi.biemo.core.page.PageFactory; import com.ruoyi.biemo.nlp.DependencyParserUtils; +import com.ruoyi.biemo.nlp.SentimentAnalysisUtils; import com.ruoyi.biemo.nlp.TextClassificationUtils; import com.ruoyi.biemo.utils.MyObjects; import com.ruoyi.common.annotation.Log; @@ -20,6 +25,7 @@ import com.ruoyi.common.core.controller.BaseController; import com.ruoyi.common.core.domain.AjaxResult; import com.ruoyi.common.enums.BusinessType; import com.ruoyi.common.utils.poi.ExcelUtil; +import org.apache.commons.lang3.StringUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.mongodb.core.MongoTemplate; import org.springframework.data.mongodb.core.query.Criteria; @@ -29,9 +35,7 @@ import org.springframework.web.bind.annotation.*; import javax.servlet.http.HttpServletResponse; import java.io.IOException; -import java.util.ArrayList; -import java.util.Date; -import java.util.List; +import java.util.*; /** @@ -302,47 +306,107 @@ public class DocInfoController extends BaseController { // docInfoService.insertEs(docInfo); // } - @PostMapping ("/splitWord") - public AjaxResult splitWord(@RequestBody DocInfo docInfo){ - List strings = docInfoService.analyzeTitle(docInfo); - docInfo.setAnalyzeTitle(strings); - return AjaxResult.success("分词成功",docInfo); + @PostMapping ("/splitWordStr") + public AjaxResult splitWord(@RequestBody String str){ + Segment segment= HanLP.newSegment(); + List termList = segment.seg(str); + List wordList=new ArrayList<>(); + for (Term term : termList) { + String word = term.toString().substring(0, term.length()); + wordList.add(word); + } + return AjaxResult.success("分词成功",wordList); } + @PostMapping ("/splitWordDocInfo") + public AjaxResult splitWordNew(@RequestBody DocInfo docInfo){ + String title = docInfo.getTitle(); + Segment segment= HanLP.newSegment(); + List termList = segment.seg(title); + List wordList=new ArrayList<>(); + //遍历分词结果 + for (Term term : termList) { + String word = term.toString().substring(0, term.length()); + wordList.add(word); + } + docInfo.setAnalyzeTitle(wordList); + return AjaxResult.success("分词成功",docInfo); + } /** * 分词结果上传 * - * @param docInfo + * @param splitResult * @return */ - @PostMapping("/uploadSplitWord") - public AjaxResult uploadsplitWord(@RequestBody DocInfo docInfo) { - Query query = new Query(Criteria.where("_id").is(docInfo.getId())); - Update update = new Update(); - update.set("splitWordResult", docInfo.getSplitWordResult()); - mongoTemplate.upsert(query, update, DocInfo.class); - return AjaxResult.success("上传成功"); + @PostMapping("/uploadSplitWordStr") + public AjaxResult uploadSplitWordNew(@RequestBody String splitResult) { + try { + String[] split = splitResult.split("/"); + for (int i = 0; i < split.length; i++) { + if (StringUtils.isNotBlank(split[i])) { + CustomDictionary.add(split[i]); + } + } + return AjaxResult.success("分词上传成功"); + }catch (Exception e){ + return AjaxResult.error("格式不正确"); + } } - /** - * 分词词训练结果上传 - * - * @param splitWordDrill - * @return - */ - @PostMapping("/uploadSplitWordDrill") - public AjaxResult uploadSplitWordDrill(@RequestBody SplitWordDrill splitWordDrill) { - for (String data : splitWordDrill.getSplitResult()) { - mongoTemplate.updateFirst( - Query.query(Criteria.where("_id").is(splitWordDrill.getId())), // 查询条件 - new Update() - .push("splitResult").each(data) - .currentDate("updateTime"), -// .currentTimestamp("updateTime"), - "splitWordDrill"); + @PostMapping("/uploadSplitWordDocInfo") + public AjaxResult uploadSplitWordNew(@RequestBody DocInfo docInfo) { + try { + List analyzeTitleList = docInfo.getAnalyzeTitle(); + for (int i = 0; i < analyzeTitleList.size(); i++) { + if(StringUtils.isNotBlank(analyzeTitleList.get(i))){ + CustomDictionary.add(analyzeTitleList.get(i)); + } + } + Query query = new Query(Criteria.where("_id").is(docInfo.getId())); + Update update = new Update(); + update.set("splitWordStatus", "已分词"); + mongoTemplate.upsert(query, update, DocInfo.class); + return AjaxResult.success("分词上传成功"); + }catch (Exception e){ + return AjaxResult.error("格式不正确"); } - return AjaxResult.success("上传成功"); } + +// /** +// * 分词结果上传 +// * +// * @param docInfo +// * @return +// */ +// @PostMapping("/uploadSplitWord") +// public AjaxResult uploadsplitWord(@RequestBody DocInfo docInfo) { +// Query query = new Query(Criteria.where("_id").is(docInfo.getId())); +// Update update = new Update(); +// update.set("splitWordResult", docInfo.getSplitWordResult()); +// mongoTemplate.upsert(query, update, DocInfo.class); +// return AjaxResult.success("上传成功"); +// } +// +// /** +// * 分词词训练结果上传 +// * +// * @param splitWordDrill +// * @return +// */ +// @PostMapping("/uploadSplitWordDrill") +// public AjaxResult uploadSplitWordDrill(@RequestBody SplitWordDrill splitWordDrill) { +// for (String data : splitWordDrill.getSplitResult()) { +// mongoTemplate.updateFirst( +// Query.query(Criteria.where("_id").is(splitWordDrill.getId())), // 查询条件 +// new Update() +// .push("splitResult").each(data) +// .currentDate("updateTime"), +//// .currentTimestamp("updateTime"), +// "splitWordDrill"); +// } +// return AjaxResult.success("上传成功"); +// } + } diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/domain/DocInfo.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/domain/DocInfo.java index cd93863..7e8e8a6 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/domain/DocInfo.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/domain/DocInfo.java @@ -63,7 +63,7 @@ public class DocInfo extends BiemoEntity { //分词数据 private List analyzeTitle; - //分词结果 - private String splitWordResult; + //分词状态 已分词 和未分词 + private String splitWordStatus; }