词性标注修改

master
yz 2 years ago
parent a7708baaee
commit 58a473a878

@ -2,9 +2,13 @@ package com.ruoyi.biemo.business.controller;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.document.sentence.Sentence;
import com.hankcs.hanlp.corpus.document.sentence.word.IWord;
import com.hankcs.hanlp.corpus.document.sentence.word.Word;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import com.ruoyi.biemo.business.domain.Category;
import com.ruoyi.biemo.business.domain.DocInfo;
import com.ruoyi.biemo.mongodb.entity.SplitWordDrill;
@ -13,6 +17,7 @@ import com.ruoyi.biemo.business.service.DocInfoService;
import com.ruoyi.biemo.core.page.Page;
import com.ruoyi.biemo.core.page.PageFactory;
import com.ruoyi.biemo.nlp.DependencyParserUtils;
import com.ruoyi.biemo.nlp.SentimentAnalysisUtils;
import com.ruoyi.biemo.nlp.TextClassificationUtils;
import com.ruoyi.biemo.utils.MyObjects;
import com.ruoyi.common.annotation.Log;
@ -20,6 +25,7 @@ import com.ruoyi.common.core.controller.BaseController;
import com.ruoyi.common.core.domain.AjaxResult;
import com.ruoyi.common.enums.BusinessType;
import com.ruoyi.common.utils.poi.ExcelUtil;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.query.Criteria;
@ -29,9 +35,7 @@ import org.springframework.web.bind.annotation.*;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.*;
/**
@ -302,47 +306,107 @@ public class DocInfoController extends BaseController {
// docInfoService.insertEs(docInfo);
// }
@PostMapping ("/splitWord")
public AjaxResult splitWord(@RequestBody DocInfo docInfo){
List<String> strings = docInfoService.analyzeTitle(docInfo);
docInfo.setAnalyzeTitle(strings);
return AjaxResult.success("分词成功",docInfo);
@PostMapping ("/splitWordStr")
public AjaxResult splitWord(@RequestBody String str){
Segment segment= HanLP.newSegment();
List<Term> termList = segment.seg(str);
List<String> wordList=new ArrayList<>();
for (Term term : termList) {
String word = term.toString().substring(0, term.length());
wordList.add(word);
}
return AjaxResult.success("分词成功",wordList);
}
@PostMapping ("/splitWordDocInfo")
public AjaxResult splitWordNew(@RequestBody DocInfo docInfo){
String title = docInfo.getTitle();
Segment segment= HanLP.newSegment();
List<Term> termList = segment.seg(title);
List<String> wordList=new ArrayList<>();
//遍历分词结果
for (Term term : termList) {
String word = term.toString().substring(0, term.length());
wordList.add(word);
}
docInfo.setAnalyzeTitle(wordList);
return AjaxResult.success("分词成功",docInfo);
}
/**
*
*
* @param docInfo
* @param splitResult
* @return
*/
@PostMapping("/uploadSplitWord")
public AjaxResult uploadsplitWord(@RequestBody DocInfo docInfo) {
Query query = new Query(Criteria.where("_id").is(docInfo.getId()));
Update update = new Update();
update.set("splitWordResult", docInfo.getSplitWordResult());
mongoTemplate.upsert(query, update, DocInfo.class);
return AjaxResult.success("上传成功");
@PostMapping("/uploadSplitWordStr")
public AjaxResult uploadSplitWordNew(@RequestBody String splitResult) {
try {
String[] split = splitResult.split("/");
for (int i = 0; i < split.length; i++) {
if (StringUtils.isNotBlank(split[i])) {
CustomDictionary.add(split[i]);
}
}
return AjaxResult.success("分词上传成功");
}catch (Exception e){
return AjaxResult.error("格式不正确");
}
}
/**
*
*
* @param splitWordDrill
* @return
*/
@PostMapping("/uploadSplitWordDrill")
public AjaxResult uploadSplitWordDrill(@RequestBody SplitWordDrill splitWordDrill) {
for (String data : splitWordDrill.getSplitResult()) {
mongoTemplate.updateFirst(
Query.query(Criteria.where("_id").is(splitWordDrill.getId())), // 查询条件
new Update()
.push("splitResult").each(data)
.currentDate("updateTime"),
// .currentTimestamp("updateTime"),
"splitWordDrill");
@PostMapping("/uploadSplitWordDocInfo")
public AjaxResult uploadSplitWordNew(@RequestBody DocInfo docInfo) {
try {
List<String> analyzeTitleList = docInfo.getAnalyzeTitle();
for (int i = 0; i < analyzeTitleList.size(); i++) {
if(StringUtils.isNotBlank(analyzeTitleList.get(i))){
CustomDictionary.add(analyzeTitleList.get(i));
}
}
Query query = new Query(Criteria.where("_id").is(docInfo.getId()));
Update update = new Update();
update.set("splitWordStatus", "已分词");
mongoTemplate.upsert(query, update, DocInfo.class);
return AjaxResult.success("分词上传成功");
}catch (Exception e){
return AjaxResult.error("格式不正确");
}
return AjaxResult.success("上传成功");
}
// /**
// * 分词结果上传
// *
// * @param docInfo
// * @return
// */
// @PostMapping("/uploadSplitWord")
// public AjaxResult uploadsplitWord(@RequestBody DocInfo docInfo) {
// Query query = new Query(Criteria.where("_id").is(docInfo.getId()));
// Update update = new Update();
// update.set("splitWordResult", docInfo.getSplitWordResult());
// mongoTemplate.upsert(query, update, DocInfo.class);
// return AjaxResult.success("上传成功");
// }
//
// /**
// * 分词词训练结果上传
// *
// * @param splitWordDrill
// * @return
// */
// @PostMapping("/uploadSplitWordDrill")
// public AjaxResult uploadSplitWordDrill(@RequestBody SplitWordDrill splitWordDrill) {
// for (String data : splitWordDrill.getSplitResult()) {
// mongoTemplate.updateFirst(
// Query.query(Criteria.where("_id").is(splitWordDrill.getId())), // 查询条件
// new Update()
// .push("splitResult").each(data)
// .currentDate("updateTime"),
//// .currentTimestamp("updateTime"),
// "splitWordDrill");
// }
// return AjaxResult.success("上传成功");
// }
}

@ -63,7 +63,7 @@ public class DocInfo extends BiemoEntity {
//分词数据
private List<String> analyzeTitle;
//分词结果
private String splitWordResult;
//分词状态 已分词 和未分词
private String splitWordStatus;
}

Loading…
Cancel
Save