From 6eb60c7e56acc5921ee9b354714244ccf40e3706 Mon Sep 17 00:00:00 2001 From: yz <3614508250@qq.com> Date: Thu, 18 May 2023 16:22:29 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=87=E6=9C=AC=E5=A4=A7=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E9=A1=B9=E7=9B=AE=E4=B8=8A=E4=BC=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../controller/DocInfoController.java | 19 ++++++ .../ruoyi/biemo/business/domain/DocInfo.java | 5 ++ .../business/service/DocInfoService.java | 1 + .../biemo/elasticsearch/util/EsService.java | 65 +++++++++++++++++-- 4 files changed, 84 insertions(+), 6 deletions(-) diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/DocInfoController.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/DocInfoController.java index 1ac3164..3680c6d 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/DocInfoController.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/controller/DocInfoController.java @@ -5,6 +5,8 @@ import com.alibaba.fastjson.JSONObject; import com.hankcs.hanlp.corpus.document.sentence.Sentence; import com.hankcs.hanlp.corpus.document.sentence.word.IWord; import com.hankcs.hanlp.corpus.document.sentence.word.Word; +import com.hankcs.hanlp.seg.common.Term; +import com.hankcs.hanlp.tokenizer.NotionalTokenizer; import com.ruoyi.biemo.business.domain.Category; import com.ruoyi.biemo.business.domain.DocInfo; import com.ruoyi.biemo.business.service.CategoryService; @@ -43,6 +45,7 @@ public class DocInfoController extends BaseController { private DocInfoService docInfoService; @Autowired private CategoryService categoryService; + /** * 查询文章管理列表 */ @@ -201,4 +204,20 @@ public class DocInfoController extends BaseController { return AjaxResult.success("查询成功",TextClassificationUtils.getClassification(content)); } +// @PostMapping("/createEsIndex") +// public void createEsIndex(){ +// DocInfo docInfo=new DocInfo(); +// docInfoService.insertEs(docInfo); +// } + + @PostMapping ("/splitWord") + public AjaxResult splitWord(@RequestBody DocInfo docInfo){ + List strings = docInfoService.analyzeTitle(docInfo); + docInfo.setAnalyzeTitle(strings); + return AjaxResult.success("分词成功",docInfo); + } + + //分词结果上传 + + } diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/domain/DocInfo.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/domain/DocInfo.java index a62e18b..c7caac3 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/domain/DocInfo.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/domain/DocInfo.java @@ -5,6 +5,8 @@ import com.ruoyi.biemo.elasticsearch.annotation.FieldInfo; import lombok.Data; import org.springframework.data.elasticsearch.annotations.Document; +import java.util.List; + @Data @Document(indexName = "doc_info") @@ -58,4 +60,7 @@ public class DocInfo extends BiemoEntity { private String cateName; + //分词数据 + private List analyzeTitle; + } diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java index 39c280f..e937cef 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/business/service/DocInfoService.java @@ -15,6 +15,7 @@ import com.ruoyi.biemo.nlp.DependencyParserUtils; import com.ruoyi.biemo.nlp.SummaryUtils; import com.ruoyi.biemo.utils.MyObjects; import com.ruoyi.common.utils.StringUtils; +import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequestBuilder; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.sort.SortOrder; diff --git a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/elasticsearch/util/EsService.java b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/elasticsearch/util/EsService.java index 1faf575..13cbada 100644 --- a/ruoyi-biemo/src/main/java/com/ruoyi/biemo/elasticsearch/util/EsService.java +++ b/ruoyi-biemo/src/main/java/com/ruoyi/biemo/elasticsearch/util/EsService.java @@ -1,6 +1,8 @@ package com.ruoyi.biemo.elasticsearch.util; import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONArray; +import com.alibaba.fastjson.JSONObject; import com.ruoyi.biemo.business.domain.DocInfo; import com.ruoyi.biemo.core.page.Page; import com.ruoyi.biemo.elasticsearch.annotation.EsId; @@ -11,7 +13,10 @@ import com.ruoyi.common.exception.CustomException; import com.ruoyi.common.exception.ServiceException; import lombok.Data; import org.apache.commons.lang3.StringUtils; +import org.apache.http.util.EntityUtils; import org.apache.poi.ss.formula.functions.T; +import org.elasticsearch.action.admin.indices.analyze.AnalyzeAction; +import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequestBuilder; import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; import org.elasticsearch.action.bulk.BulkRequest; @@ -21,8 +26,7 @@ import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchScrollRequest; import org.elasticsearch.action.support.master.AcknowledgedResponse; -import org.elasticsearch.client.RequestOptions; -import org.elasticsearch.client.RestHighLevelClient; +import org.elasticsearch.client.*; import org.elasticsearch.client.indices.CreateIndexRequest; import org.elasticsearch.client.indices.CreateIndexResponse; import org.elasticsearch.client.indices.GetIndexRequest; @@ -126,8 +130,8 @@ public abstract class EsService { } /** * 创建mapping - * @param index 索引 - * @param type 类型 + * @param + * @param * @param clazz 索引类型 */ public boolean createIndexAndCreateMapping( Class clazz, boolean rebuild, int number_of_shards, int number_of_replicas) { @@ -161,9 +165,9 @@ public abstract class EsService { * 根据信息自动创建索引与mapping * 构建mapping描述 * @param index 索引名称 - * @param type 类型名称 + * @param * @param fieldMappingList 字段信息 - * @param client es客户端 + * @param * @param number_of_shards 分片数 * @param number_of_replicas 副本数 * @return @@ -872,6 +876,55 @@ public abstract class EsService { return (Class) params[index]; } + //分词 返回分词结果 + public List analyzeTitle(DocInfo docInfo){ + try { + RestHighLevelClient client = ElasticSearchPoolUtil.getClient(); + + Request request = new Request("GET", "_analyze"); + JSONObject entity = new JSONObject(); + entity.put("analyzer", "hanlp_index"); + entity.put("text", docInfo.getTitle()); + request.setJsonEntity(entity.toJSONString()); + Response response = client.getLowLevelClient().performRequest(request); + JSONObject tokens = JSONObject.parseObject(EntityUtils.toString(response.getEntity())); + JSONArray arrays = tokens.getJSONArray("tokens"); + List list=new ArrayList<>(); + for (int i = 0; i < arrays.size(); i++) { + JSONObject obj = JSON.parseObject(arrays.getString(i)); + list.add(obj.getString("token")); + } + return list; + } catch (Exception e) { + e.printStackTrace(); + } + return null; + } + +// //分词 返回分词结果 +// public List analyzeTitle(DocInfo docInfo){ +// try { +// RestHighLevelClient client = ElasticSearchPoolUtil.getClient(); +// RestClient lowLevelClient = client.getLowLevelClient(); +// Request request = new Request("GET", "_analyze"); +// JSONObject entity = new JSONObject(); +// entity.put("analyzer", "ik_max_word"); +// entity.put("text", text); +// AnalyzeRequestBuilder analyzeRequestBuilder = new AnalyzeRequestBuilder((ElasticsearchClient) lowLevelClient,AnalyzeAction.INSTANCE,"doc_info",docInfo.getTitle()); +// analyzeRequestBuilder.setAnalyzer("hanlp_index"); +// List analyzeTokenList = analyzeRequestBuilder.execute().actionGet().getTokens(); +// List searchTermList = new ArrayList<>(); +// for (int i = 0; i < analyzeTokenList.size(); i++) { +// searchTermList.add(String.valueOf(analyzeTokenList.get(i))); +// } +// System.out.println("111"); +// return searchTermList; +// } catch (Exception e) { +// e.printStackTrace(); +// } +// return null; +// } + }