From 362b430ba6d6fc7f545daaf4857d97ec3276a4d6 Mon Sep 17 00:00:00 2001 From: hujunbo <9094908@qq.com> Date: Wed, 2 Nov 2022 21:51:12 +0800 Subject: [PATCH] =?UTF-8?q?=E5=9F=BA=E6=9C=AC=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../main/java/com/ruoyi/task/SyncData.java | 38 +++++++++++++++++-- .../controller/CategoryController.java | 10 ++++- .../controller/DocInfoController.java | 13 ++++++- .../business/controller/ReportController.java | 24 ++++++++++++ .../ruoyi/biemo/business/domain/Report.java | 7 +++- .../biemo/nlp/SentimentAnalysisUtils.java | 31 +++++++++++++-- .../biemo/nlp/TextClassificationUtils.java | 32 ++++++++++++++++ .../demo/DemoPerceptronLexicalAnalyzer.java | 19 +++++----- 8 files changed, 156 insertions(+), 18 deletions(-) diff --git a/ruoyi-admin/src/main/java/com/ruoyi/task/SyncData.java b/ruoyi-admin/src/main/java/com/ruoyi/task/SyncData.java index 695dea4..4c7ef69 100644 --- a/ruoyi-admin/src/main/java/com/ruoyi/task/SyncData.java +++ b/ruoyi-admin/src/main/java/com/ruoyi/task/SyncData.java @@ -1,7 +1,11 @@ package com.ruoyi.task; +import com.ruoyi.biemo.business.domain.Category; import com.ruoyi.biemo.business.domain.DocInfo; +import com.ruoyi.biemo.business.domain.Report; +import com.ruoyi.biemo.business.service.CategoryService; import com.ruoyi.biemo.business.service.DocInfoService; +import com.ruoyi.biemo.business.service.ReportService; import com.ruoyi.biemo.mongodb.bean.Page; import com.ruoyi.biemo.mongodb.utils.CriteriaAndWrapper; import com.ruoyi.biemo.mongodb.utils.MongoHelper; @@ -15,7 +19,10 @@ public class SyncData { MongoHelper mongoHelper; @Autowired DocInfoService docInfoService; - + @Autowired + CategoryService categoryService; + @Autowired + ReportService reportService; public void mongoToEs(){ Page page = new Page(); Page pageInfo = mongoHelper.findPage(new CriteriaAndWrapper().eq(DocInfo::getIsSync,0),page, DocInfo.class); @@ -24,15 +31,40 @@ public class SyncData { docInfos.forEach(docInfo -> { docInfo.setCreatedBy(1L); docInfo.setUpdatedBy(1L); - docInfoService.insertEs(docInfo); + docInfo.setCreateTime(System.currentTimeMillis()); + docInfo.setUpdateTime(System.currentTimeMillis()); docInfo.setIsSync(1); docInfoService.insertOrUpdateDocInfo(docInfo); }); } } - public void mysqlToMongo(){ + public void categoryToEs(){ + Page page = new Page(); + Page pageInfo = mongoHelper.findPage(new CriteriaAndWrapper().eq(Category::getIsSync,0),page, Category.class); + List categories = pageInfo.getList(); + if(categories!=null&& categories.size()>0){ + categories.forEach(category -> { + category.setCreatedBy(1L); + category.setUpdatedBy(1L); + category.setIsSync(1); + categoryService.insertOrUpdateCategory(category); + }); + } + } + public void reportToEs(){ + Page page = new Page(); + Page pageInfo = mongoHelper.findPage(new CriteriaAndWrapper().eq(Report::getIsSync,0),page, Report.class); + List reports = pageInfo.getList(); + if(reports!=null&& reports.size()>0){ + reports.forEach(report -> { + report.setCreatedBy(1L); + report.setUpdatedBy(1L); + report.setIsSync(1); + reportService.insertOrUpdateReport(report); + }); + } } } diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/CategoryController.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/CategoryController.java index 6de8b98..f10cd5d 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/CategoryController.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/CategoryController.java @@ -4,6 +4,7 @@ import com.ruoyi.biemo.business.domain.Category; import com.ruoyi.biemo.business.service.CategoryService; import com.ruoyi.biemo.core.page.Page; import com.ruoyi.biemo.core.page.PageFactory; +import com.ruoyi.biemo.nlp.TextClassificationUtils; import com.ruoyi.common.annotation.Log; import com.ruoyi.common.core.domain.AjaxResult; import com.ruoyi.common.enums.BusinessType; @@ -11,6 +12,7 @@ import com.ruoyi.common.utils.poi.ExcelUtil; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.security.access.prepost.PreAuthorize; import org.springframework.web.bind.annotation.*; +import org.springframework.web.multipart.MultipartFile; import javax.servlet.http.HttpServletResponse; import java.util.List; @@ -61,8 +63,14 @@ public class CategoryController { @PreAuthorize("@ss.hasPermi('biemo:category:add')") @Log(title = "分类管理", businessType = BusinessType.INSERT) @RequestMapping - public AjaxResult add(@RequestBody Category category) + public AjaxResult add(MultipartFile[] files , String id,String name,String parentId,Integer orderNum) { + Category category = new Category(); + category.setId(id); + category.setName(name); + category.setParentId(parentId); + category.setOrderNum(orderNum); + TextClassificationUtils.myTrainOrLoadModel(files,name); categoryService.insertOrUpdateCategory(category); return AjaxResult.success(); } diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/DocInfoController.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/DocInfoController.java index 799f6ab..01031ca 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/DocInfoController.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/DocInfoController.java @@ -12,6 +12,7 @@ import com.ruoyi.biemo.business.service.DocInfoService; import com.ruoyi.biemo.core.page.Page; import com.ruoyi.biemo.core.page.PageFactory; import com.ruoyi.biemo.nlp.DependencyParserUtils; +import com.ruoyi.biemo.nlp.TextClassificationUtils; import com.ruoyi.biemo.utils.MyObjects; import com.ruoyi.common.annotation.Log; import com.ruoyi.common.core.controller.BaseController; @@ -164,7 +165,6 @@ public class DocInfoController extends BaseController { return AjaxResult.success(); } - /** * 删除文章管理 */ @@ -190,4 +190,15 @@ public class DocInfoController extends BaseController { public AjaxResult summary(@PathVariable String id){ return AjaxResult.success(docInfoService.summary(id)); } + + @GetMapping("/handleClassification/{id}") + public AjaxResult handleClassification(@PathVariable String id){ + DocInfo docInfo = docInfoService.selectDocInfoById(id); + JSONObject result = new JSONObject(); + String content = docInfo.getContent(); + content = MyObjects.delHTMLTag(content); + content = MyObjects.delSpace(content); + return AjaxResult.success("查询成功",TextClassificationUtils.getClassification(content)); + } + } diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/ReportController.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/ReportController.java index 50192be..df5e5b5 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/ReportController.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/ReportController.java @@ -8,9 +8,13 @@ import com.ruoyi.biemo.nlp.SentimentAnalysisUtils; import com.ruoyi.biemo.nlp.WordVectorModelUtils; import com.ruoyi.common.annotation.Log; import com.ruoyi.common.core.domain.AjaxResult; +import com.ruoyi.common.core.domain.entity.SysDept; +import com.ruoyi.common.core.domain.entity.SysUser; import com.ruoyi.common.enums.BusinessType; import com.ruoyi.common.utils.SecurityUtils; import com.ruoyi.common.utils.poi.ExcelUtil; +import com.ruoyi.system.service.ISysDeptService; +import com.ruoyi.system.service.ISysUserService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.security.access.prepost.PreAuthorize; import org.springframework.web.bind.annotation.*; @@ -34,6 +38,12 @@ public class ReportController @Autowired private ReportService reportService; + @Autowired + private ISysUserService userService; + + @Autowired + private ISysDeptService deptService; + /** * 查询分类管理列表 */ @@ -46,6 +56,20 @@ public class ReportController return page; } + @GetMapping("/indexList") + public Page indexList(Report report) + { + Page page = reportService.selectReportPage(report, PageFactory.defaultPage()); + //Page page = reportService.getReportList(SecurityUtils.getUsername()); +// page.getRows().forEach(report1 -> { +// SysUser user = userService.selectUserById(report1.getCreatedBy()); +// report1.setUser(user); +// SysDept dept = deptService.selectDeptById(user.getDeptId()); +// report1.setDept(dept); +// }); + return page; + } + /** * 导出分类管理列表 */ diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/domain/Report.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/domain/Report.java index 7fb2fee..fe517d2 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/domain/Report.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/domain/Report.java @@ -2,6 +2,8 @@ package com.ruoyi.biemo.business.domain; import com.ruoyi.biemo.elasticsearch.annotation.EsId; import com.ruoyi.biemo.elasticsearch.annotation.FieldInfo; +import com.ruoyi.common.core.domain.entity.SysDept; +import com.ruoyi.common.core.domain.entity.SysUser; import lombok.Data; import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.commons.lang3.builder.ToStringStyle; @@ -33,6 +35,10 @@ public class Report extends BiemoEntity { */ @FieldInfo(type = "string", participle = 3) private String title; + + + @FieldInfo(type = "keyword",participle = 0) + private Integer isSync; /** * 状态 */ @@ -58,5 +64,4 @@ public class Report extends BiemoEntity { private Long startTime; private Long timeUsed; private List steps; - } diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/nlp/SentimentAnalysisUtils.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/nlp/SentimentAnalysisUtils.java index adee713..3777624 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/nlp/SentimentAnalysisUtils.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/nlp/SentimentAnalysisUtils.java @@ -2,6 +2,8 @@ package com.ruoyi.biemo.nlp; import com.hankcs.hanlp.classification.classifiers.IClassifier; import com.hankcs.hanlp.classification.classifiers.NaiveBayesClassifier; +import com.hankcs.hanlp.classification.models.NaiveBayesModel; +import com.hankcs.hanlp.corpus.io.IOUtil; import com.ruoyi.biemo.utils.TestUtility; import org.springframework.web.multipart.MultipartFile; @@ -15,18 +17,39 @@ import java.util.List; */ public class SentimentAnalysisUtils { public static final String CORPUS_FOLDER = TestUtility.ensureTestData("ChnSentiCorp情感分析酒店评论", "http://hanlp.linrunsoft.com/release/corpus/ChnSentiCorp.zip"); + public static final String MODEL_PATH = "data/test/sentiment-classification-model.ser"; + public static String analysis(String text){ String result = ""; - IClassifier classifier = new NaiveBayesClassifier(); try { - classifier.train(CORPUS_FOLDER); + IClassifier classifier = new NaiveBayesClassifier(trainOrLoadModel()); result = classifier.classify(text); - } catch (IOException ioException) { + } catch (Exception ioException) { ioException.printStackTrace(); } return result; } + //训练模型 + private static NaiveBayesModel trainOrLoadModel() throws IOException + { + NaiveBayesModel model = (NaiveBayesModel) IOUtil.readObjectFrom(MODEL_PATH); + if (model != null) return model; + + File corpusFolder = new File(CORPUS_FOLDER); + if (!corpusFolder.exists() || !corpusFolder.isDirectory()) + { + System.err.println("没有文本分类语料,请阅读IClassifier.train(java.lang.String)中定义的语料格式与语料下载:") ; + System.exit(1); + } + + IClassifier classifier = new NaiveBayesClassifier(); // 创建分类器,更高级的功能请参考IClassifier的接口定义 + classifier.train(CORPUS_FOLDER); // 训练后的模型支持持久化,下次就不必训练了 + model = (NaiveBayesModel) classifier.getModel(); + IOUtil.saveObjectTo(model, MODEL_PATH); + return model; + } + public static Object analysisImport(MultipartFile[] files, String name) { if(files!=null&&files.length>0){ File file1 = new File(CORPUS_FOLDER+"/"+name); @@ -48,6 +71,8 @@ public class SentimentAnalysisUtils { IClassifier classifier = new NaiveBayesClassifier(); try { classifier.train(CORPUS_FOLDER); + NaiveBayesModel model = (NaiveBayesModel) classifier.getModel(); + IOUtil.saveObjectTo(model, MODEL_PATH); } catch (IOException ioException) { ioException.printStackTrace(); } diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/nlp/TextClassificationUtils.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/nlp/TextClassificationUtils.java index 0970a9e..87fe9a3 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/nlp/TextClassificationUtils.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/nlp/TextClassificationUtils.java @@ -5,6 +5,7 @@ import com.hankcs.hanlp.classification.classifiers.NaiveBayesClassifier; import com.hankcs.hanlp.classification.models.NaiveBayesModel; import com.hankcs.hanlp.corpus.io.IOUtil; import com.ruoyi.biemo.utils.TestUtility; +import org.springframework.web.multipart.MultipartFile; import java.io.File; import java.io.IOException; @@ -52,4 +53,35 @@ public class TextClassificationUtils { return model; } + //我的训练模型 + public static Object myTrainOrLoadModel(MultipartFile[] files, String name) { + if(files!=null&&files.length>0){ + File file1 = new File(CORPUS_FOLDER+"/"+name); + if(!file1.getParentFile().exists()){ + file1.mkdirs(); + } + if(!file1.exists()){ + file1.mkdirs(); + } + for(int i=0;i wordList = new ArrayList<>(); Word word = new Word(null,null); - word.setLabel("nt"); - word.setValue("俄乌冲突"); + word.setLabel("中国光大银行"); + word.setValue("nt"); wordList.add(word); Sentence sentence = new Sentence(wordList); - analyzer.learn(sentence); + //analyzer.learn(sentence); // 学习到新知识 - System.out.println(analyzer.analyze("总统普京与特朗普通电话讨论太空探索技术公司,麻皮粗壮是我的名字")); + System.out.println(HanLP.newSegment().seg2sentence(text)); + // 还可以举一反三 //System.out.println(analyzer.analyze("主席和特朗普通电话")); @@ -63,9 +64,9 @@ public class DemoPerceptronLexicalAnalyzer extends TestUtility //System.out.println(analyzer.analyze("我在四川金华出生,我的名字叫金华")); // 在线学习后的模型支持序列化,以分词模型为例: - analyzer.getPerceptronSegmenter().getModel().save(HanLP.Config.PerceptronCWSModelPath); - analyzer.getPerceptronPOSTagger().getModel().save(HanLP.Config.PerceptronPOSModelPath); - analyzer.getPerceptionNERecognizer().getModel().save(HanLP.Config.PerceptronNERModelPath); + //analyzer.getPerceptronSegmenter().getModel().save(HanLP.Config.PerceptronCWSModelPath); + //analyzer.getPerceptronPOSTagger().getModel().save(HanLP.Config.PerceptronPOSModelPath); + //analyzer.getPerceptionNERecognizer().getModel().save(HanLP.Config.PerceptronNERModelPath); // 请用户按需执行对空格制表符等的预处理,只有你最清楚自己的文本中都有些什么奇怪的东西 // System.out.println(analyzer.analyze("空格 \t\n\r\f 统统都不要" // .replaceAll("\\s+", "") // 去除所有空白符