|
|
|
@ -5,6 +5,7 @@ import com.alibaba.fastjson.JSON;
|
|
|
|
|
import com.alibaba.fastjson.JSONArray;
|
|
|
|
|
import com.alibaba.fastjson.JSONObject;
|
|
|
|
|
import com.sztzjy.marketing.annotation.AnonymousAccess;
|
|
|
|
|
import com.sztzjy.marketing.entity.dto.CommentDTO;
|
|
|
|
|
import com.sztzjy.marketing.entity.dto.SentimentAnalyDTO;
|
|
|
|
|
import com.sztzjy.marketing.qianfan.util.Json;
|
|
|
|
|
import com.sztzjy.marketing.util.ResultEntity;
|
|
|
|
@ -17,8 +18,7 @@ import org.springframework.web.bind.annotation.RequestMapping;
|
|
|
|
|
import org.springframework.web.bind.annotation.RestController;
|
|
|
|
|
|
|
|
|
|
import java.io.*;
|
|
|
|
|
import java.util.List;
|
|
|
|
|
import java.util.Map;
|
|
|
|
|
import java.util.*;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@RestController
|
|
|
|
@ -141,4 +141,234 @@ public class StuPythonController {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@PostMapping("/comment")
|
|
|
|
|
@ApiOperation("评论点抽取")
|
|
|
|
|
@AnonymousAccess
|
|
|
|
|
public ResultEntity comment(@RequestBody JSONObject text) {
|
|
|
|
|
// System.out.println(text);
|
|
|
|
|
String testText = text.getString("text");
|
|
|
|
|
String code = "# 导入所需依赖\n" +
|
|
|
|
|
"import pandas as pd\n" +
|
|
|
|
|
"import paddle\n" +
|
|
|
|
|
"from paddlenlp.transformers import SkepTokenizer, SkepModel\n" +
|
|
|
|
|
"from utils.utils import decoding, concate_aspect_and_opinion, format_print\n" +
|
|
|
|
|
"from utils import data_ext, data_cls\n" +
|
|
|
|
|
"from utils.model_define import SkepForTokenClassification, SkepForSequenceClassification\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
"# 单条文本情感分析预测函数\n" +
|
|
|
|
|
"def predict(input_text, ext_model, cls_model, tokenizer, ext_id2label, cls_id2label, max_length=512):\n" +
|
|
|
|
|
" ext_model.eval()\n" +
|
|
|
|
|
" cls_model.eval()\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" # processing input text\n" +
|
|
|
|
|
" encoded_inputs = tokenizer(list(input_text), is_split_into_words=True, max_length=max_length,)\n" +
|
|
|
|
|
" input_ids = paddle.to_tensor([encoded_inputs[\"input_ids\"]])\n" +
|
|
|
|
|
" token_type_ids = paddle.to_tensor([encoded_inputs[\"token_type_ids\"]])\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" # extract aspect and opinion words\n" +
|
|
|
|
|
" logits = ext_model(input_ids, token_type_ids=token_type_ids)\n" +
|
|
|
|
|
" predictions = logits.argmax(axis=2).numpy()[0]\n" +
|
|
|
|
|
" tag_seq = [ext_id2label[idx] for idx in predictions][1:-1]\n" +
|
|
|
|
|
" aps = decoding(input_text, tag_seq)\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" # predict sentiment for aspect with cls_model\n" +
|
|
|
|
|
" results = []\n" +
|
|
|
|
|
" for ap in aps:\n" +
|
|
|
|
|
" aspect = ap[0]\n" +
|
|
|
|
|
" opinion_words = list(set(ap[1:]))\n" +
|
|
|
|
|
" aspect_text = concate_aspect_and_opinion(input_text, aspect, opinion_words)\n" +
|
|
|
|
|
" \n" +
|
|
|
|
|
" encoded_inputs = tokenizer(aspect_text, text_pair=input_text, max_length=max_length, return_length=True)\n" +
|
|
|
|
|
" input_ids = paddle.to_tensor([encoded_inputs[\"input_ids\"]])\n" +
|
|
|
|
|
" token_type_ids = paddle.to_tensor([encoded_inputs[\"token_type_ids\"]])\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" logits = cls_model(input_ids, token_type_ids=token_type_ids)\n" +
|
|
|
|
|
" prediction = logits.argmax(axis=1).numpy()[0]\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" result = {\"aspect\": aspect, \"opinions\": opinion_words, \"sentiment\": cls_id2label[prediction]}\n" +
|
|
|
|
|
" results.append(result)\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" # print results\n" +
|
|
|
|
|
" format_print(results)\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" # 返回预测结果\n" +
|
|
|
|
|
" return results\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
"# 批量情感分析预测函数\n" +
|
|
|
|
|
"def batchPredict(data, ext_model, cls_model, tokenizer, ext_id2label, cls_id2label, max_length=512):\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" ext_model.eval()\n" +
|
|
|
|
|
" cls_model.eval()\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" analysisResults = []\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" # 针对批量文本逐条处理\n" +
|
|
|
|
|
" for input_text in data:\n" +
|
|
|
|
|
" # processing input text\n" +
|
|
|
|
|
" encoded_inputs = tokenizer(list(input_text), is_split_into_words=True, max_length=max_length,)\n" +
|
|
|
|
|
" input_ids = paddle.to_tensor([encoded_inputs[\"input_ids\"]])\n" +
|
|
|
|
|
" token_type_ids = paddle.to_tensor([encoded_inputs[\"token_type_ids\"]])\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" # extract aspect and opinion words\n" +
|
|
|
|
|
" logits = ext_model(input_ids, token_type_ids=token_type_ids)\n" +
|
|
|
|
|
" predictions = logits.argmax(axis=2).numpy()[0]\n" +
|
|
|
|
|
" tag_seq = [ext_id2label[idx] for idx in predictions][1:-1]\n" +
|
|
|
|
|
" aps = decoding(input_text, tag_seq)\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" # predict sentiment for aspect with cls_model\n" +
|
|
|
|
|
" results = []\n" +
|
|
|
|
|
" for ap in aps:\n" +
|
|
|
|
|
" aspect = ap[0]\n" +
|
|
|
|
|
" opinion_words = list(set(ap[1:]))\n" +
|
|
|
|
|
" aspect_text = concate_aspect_and_opinion(input_text, aspect, opinion_words)\n" +
|
|
|
|
|
" \n" +
|
|
|
|
|
" encoded_inputs = tokenizer(aspect_text, text_pair=input_text, max_length=max_length, return_length=True)\n" +
|
|
|
|
|
" input_ids = paddle.to_tensor([encoded_inputs[\"input_ids\"]])\n" +
|
|
|
|
|
" token_type_ids = paddle.to_tensor([encoded_inputs[\"token_type_ids\"]])\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" logits = cls_model(input_ids, token_type_ids=token_type_ids)\n" +
|
|
|
|
|
" prediction = logits.argmax(axis=1).numpy()[0]\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" result = {\"属性\": aspect, \"观点\": opinion_words, \"情感倾向\": cls_id2label[prediction]}\n" +
|
|
|
|
|
" results.append(result)\n" +
|
|
|
|
|
" singleResult = {\"text\": input_text, \"result\": str(results)}\n" +
|
|
|
|
|
" analysisResults.append(singleResult)\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" # 返回预测结果 list形式\n" +
|
|
|
|
|
" return analysisResults\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
"label_ext_path = \"./label_ext.dict\"\n" +
|
|
|
|
|
"label_cls_path = \"./label_cls.dict\"\n" +
|
|
|
|
|
"# 加载PaddleNLP开源的基于全量数据训练好的评论观点抽取模型和属性级情感分类模型\n" +
|
|
|
|
|
"ext_model_path = \"./model/best_ext.pdparams\"\n" +
|
|
|
|
|
"cls_model_path = \"./model/best_cls.pdparams\"\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
"# load dict\n" +
|
|
|
|
|
"model_name = \"skep_ernie_1.0_large_ch\"\n" +
|
|
|
|
|
"ext_label2id, ext_id2label = data_ext.load_dict(label_ext_path)\n" +
|
|
|
|
|
"cls_label2id, cls_id2label = data_cls.load_dict(label_cls_path)\n" +
|
|
|
|
|
"tokenizer = SkepTokenizer.from_pretrained(model_name)\n" +
|
|
|
|
|
"print(\"label dict loaded.\")\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
"# load ext model 观点抽取模型\n" +
|
|
|
|
|
"ext_state_dict = paddle.load(ext_model_path)\n" +
|
|
|
|
|
"ext_skep = SkepModel.from_pretrained(model_name)\n" +
|
|
|
|
|
"ext_model = SkepForTokenClassification(ext_skep, num_classes=len(ext_label2id))\n" +
|
|
|
|
|
"ext_model.load_dict(ext_state_dict)\n" +
|
|
|
|
|
"print(\"extraction model loaded.\")\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
"# load cls model 属性级情感分析模型\n" +
|
|
|
|
|
"cls_state_dict = paddle.load(cls_model_path)\n" +
|
|
|
|
|
"cls_skep = SkepModel.from_pretrained(model_name)\n" +
|
|
|
|
|
"cls_model = SkepForSequenceClassification(cls_skep, num_classes=len(cls_label2id))\n" +
|
|
|
|
|
"cls_model.load_dict(cls_state_dict)\n" +
|
|
|
|
|
"print(\"classification model loaded.\")\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
"# 单条文本情感分析\n" +
|
|
|
|
|
"max_length = 512\n" +
|
|
|
|
|
"input_text = '经济实惠、动力不错、油耗低'\n" +
|
|
|
|
|
"predict(input_text, ext_model, cls_model, tokenizer, ext_id2label, cls_id2label, max_length=max_length)";
|
|
|
|
|
|
|
|
|
|
// 替换代码中的 test_text 为实际的 testText
|
|
|
|
|
String updatedCode = code.replace("'经济实惠、动力不错、油耗低'", "'" + testText + "'");
|
|
|
|
|
|
|
|
|
|
//System.out.println(updatedCode);
|
|
|
|
|
try {
|
|
|
|
|
String s = IdUtil.simpleUUID();
|
|
|
|
|
String tempPythonFile = "/usr/local/tianzeProject/digitalMarketing/comment/backend/" + s + ".py";
|
|
|
|
|
|
|
|
|
|
File file1 = new File(tempPythonFile);
|
|
|
|
|
|
|
|
|
|
// 确保父目录存在
|
|
|
|
|
File parentDir = file1.getParentFile();
|
|
|
|
|
if (!parentDir.exists()) {
|
|
|
|
|
System.out.println("Parent directory does not exist. Creating it.");
|
|
|
|
|
if (!parentDir.mkdirs()) {
|
|
|
|
|
System.out.println("Failed to create directories.");
|
|
|
|
|
return new ResultEntity<>(HttpStatus.INTERNAL_SERVER_ERROR);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 创建文件并写入内容
|
|
|
|
|
if (!file1.exists()) {
|
|
|
|
|
try {
|
|
|
|
|
boolean fileCreated = file1.createNewFile();
|
|
|
|
|
if (fileCreated) {
|
|
|
|
|
System.out.println("File created successfully: " + tempPythonFile);
|
|
|
|
|
} else {
|
|
|
|
|
System.out.println("File already exists: " + tempPythonFile);
|
|
|
|
|
}
|
|
|
|
|
} catch (IOException e) {
|
|
|
|
|
e.printStackTrace();
|
|
|
|
|
return new ResultEntity<>(HttpStatus.INTERNAL_SERVER_ERROR);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try (PrintWriter out = new PrintWriter(file1)) {
|
|
|
|
|
out.println(updatedCode);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 确认 Docker 命令
|
|
|
|
|
String[] command = {"docker", "exec", "pyexe", "python", tempPythonFile};
|
|
|
|
|
|
|
|
|
|
Process process = Runtime.getRuntime().exec(command);
|
|
|
|
|
|
|
|
|
|
// 获取进程的输入流
|
|
|
|
|
BufferedReader inputStream = new BufferedReader(new InputStreamReader(process.getInputStream()));
|
|
|
|
|
BufferedReader errorStream = new BufferedReader(new InputStreamReader(process.getErrorStream()));
|
|
|
|
|
|
|
|
|
|
// 读取 Python 代码的输出
|
|
|
|
|
StringBuilder output = new StringBuilder();
|
|
|
|
|
String line;
|
|
|
|
|
while ((line = inputStream.readLine()) != null) {
|
|
|
|
|
output.append(line).append("\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 读取 Python 代码的错误信息
|
|
|
|
|
StringBuilder errors = new StringBuilder();
|
|
|
|
|
while ((line = errorStream.readLine()) != null) {
|
|
|
|
|
errors.append(line).append("\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 等待进程执行完成
|
|
|
|
|
int exitCode = process.waitFor();
|
|
|
|
|
if (exitCode == 0) {
|
|
|
|
|
List<CommentDTO> comments = new ArrayList<>();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//取到输出值转为字符串
|
|
|
|
|
String strings = output.toString().trim();
|
|
|
|
|
String[] split = strings.split(",");
|
|
|
|
|
String aspect = split[0].split(": ")[1];
|
|
|
|
|
String opinionsStr = split[1].split(": ")[1];
|
|
|
|
|
String sentiment = split[2].split(": ")[1];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//将字符串转为集合
|
|
|
|
|
opinionsStr = opinionsStr.replace("'", "").replace("[", "").replace("]", "").trim();
|
|
|
|
|
List<String> opinions = Arrays.asList(opinionsStr.split(", "));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
comments.add(new CommentDTO(aspect, opinions, sentiment));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// System.out.println("Python code output:\n" + output.toString());
|
|
|
|
|
return new ResultEntity(HttpStatus.OK, comments);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
// System.err.println("Error executing Python code:\n" + errors.toString());
|
|
|
|
|
return new ResultEntity(HttpStatus.INTERNAL_SERVER_ERROR, errors.toString());
|
|
|
|
|
}
|
|
|
|
|
} catch (IOException | InterruptedException e) {
|
|
|
|
|
e.printStackTrace();
|
|
|
|
|
return new ResultEntity<>(HttpStatus.INTERNAL_SERVER_ERROR);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|