文本大数据项目上传

master
yz 2 years ago
parent 51f3c9c823
commit 6eb60c7e56

@ -5,6 +5,8 @@ import com.alibaba.fastjson.JSONObject;
import com.hankcs.hanlp.corpus.document.sentence.Sentence;
import com.hankcs.hanlp.corpus.document.sentence.word.IWord;
import com.hankcs.hanlp.corpus.document.sentence.word.Word;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.tokenizer.NotionalTokenizer;
import com.ruoyi.biemo.business.domain.Category;
import com.ruoyi.biemo.business.domain.DocInfo;
import com.ruoyi.biemo.business.service.CategoryService;
@ -43,6 +45,7 @@ public class DocInfoController extends BaseController {
private DocInfoService docInfoService;
@Autowired
private CategoryService categoryService;
/**
*
*/
@ -201,4 +204,20 @@ public class DocInfoController extends BaseController {
return AjaxResult.success("查询成功",TextClassificationUtils.getClassification(content));
}
// @PostMapping("/createEsIndex")
// public void createEsIndex(){
// DocInfo docInfo=new DocInfo();
// docInfoService.insertEs(docInfo);
// }
@PostMapping ("/splitWord")
public AjaxResult splitWord(@RequestBody DocInfo docInfo){
List<String> strings = docInfoService.analyzeTitle(docInfo);
docInfo.setAnalyzeTitle(strings);
return AjaxResult.success("分词成功",docInfo);
}
//分词结果上传
}

@ -5,6 +5,8 @@ import com.ruoyi.biemo.elasticsearch.annotation.FieldInfo;
import lombok.Data;
import org.springframework.data.elasticsearch.annotations.Document;
import java.util.List;
@Data
@Document(indexName = "doc_info")
@ -58,4 +60,7 @@ public class DocInfo extends BiemoEntity {
private String cateName;
//分词数据
private List<String> analyzeTitle;
}

@ -15,6 +15,7 @@ import com.ruoyi.biemo.nlp.DependencyParserUtils;
import com.ruoyi.biemo.nlp.SummaryUtils;
import com.ruoyi.biemo.utils.MyObjects;
import com.ruoyi.common.utils.StringUtils;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequestBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.sort.SortOrder;

@ -1,6 +1,8 @@
package com.ruoyi.biemo.elasticsearch.util;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.ruoyi.biemo.business.domain.DocInfo;
import com.ruoyi.biemo.core.page.Page;
import com.ruoyi.biemo.elasticsearch.annotation.EsId;
@ -11,7 +13,10 @@ import com.ruoyi.common.exception.CustomException;
import com.ruoyi.common.exception.ServiceException;
import lombok.Data;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.util.EntityUtils;
import org.apache.poi.ss.formula.functions.T;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeAction;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequestBuilder;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.bulk.BulkRequest;
@ -21,8 +26,7 @@ import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchScrollRequest;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.*;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.client.indices.CreateIndexResponse;
import org.elasticsearch.client.indices.GetIndexRequest;
@ -126,8 +130,8 @@ public abstract class EsService<T> {
}
/**
* mapping
* @param index
* @param type
* @param
* @param
* @param clazz
*/
public boolean createIndexAndCreateMapping( Class clazz, boolean rebuild, int number_of_shards, int number_of_replicas) {
@ -161,9 +165,9 @@ public abstract class EsService<T> {
* mapping
* mapping
* @param index
* @param type
* @param
* @param fieldMappingList
* @param client es
* @param
* @param number_of_shards
* @param number_of_replicas
* @return
@ -872,6 +876,55 @@ public abstract class EsService<T> {
return (Class) params[index];
}
//分词 返回分词结果
public List<String> analyzeTitle(DocInfo docInfo){
try {
RestHighLevelClient client = ElasticSearchPoolUtil.getClient();
Request request = new Request("GET", "_analyze");
JSONObject entity = new JSONObject();
entity.put("analyzer", "hanlp_index");
entity.put("text", docInfo.getTitle());
request.setJsonEntity(entity.toJSONString());
Response response = client.getLowLevelClient().performRequest(request);
JSONObject tokens = JSONObject.parseObject(EntityUtils.toString(response.getEntity()));
JSONArray arrays = tokens.getJSONArray("tokens");
List<String> list=new ArrayList<>();
for (int i = 0; i < arrays.size(); i++) {
JSONObject obj = JSON.parseObject(arrays.getString(i));
list.add(obj.getString("token"));
}
return list;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
// //分词 返回分词结果
// public List<String> analyzeTitle(DocInfo docInfo){
// try {
// RestHighLevelClient client = ElasticSearchPoolUtil.getClient();
// RestClient lowLevelClient = client.getLowLevelClient();
// Request request = new Request("GET", "_analyze");
// JSONObject entity = new JSONObject();
// entity.put("analyzer", "ik_max_word");
// entity.put("text", text);
// AnalyzeRequestBuilder analyzeRequestBuilder = new AnalyzeRequestBuilder((ElasticsearchClient) lowLevelClient,AnalyzeAction.INSTANCE,"doc_info",docInfo.getTitle());
// analyzeRequestBuilder.setAnalyzer("hanlp_index");
// List<AnalyzeAction.AnalyzeToken> analyzeTokenList = analyzeRequestBuilder.execute().actionGet().getTokens();
// List<String> searchTermList = new ArrayList<>();
// for (int i = 0; i < analyzeTokenList.size(); i++) {
// searchTermList.add(String.valueOf(analyzeTokenList.get(i)));
// }
// System.out.println("111");
// return searchTermList;
// } catch (Exception e) {
// e.printStackTrace();
// }
// return null;
// }
}

Loading…
Cancel
Save