|
|
|
@ -2,15 +2,15 @@ package com.sztzjy.marketing.controller.stu;
|
|
|
|
|
|
|
|
|
|
import cn.hutool.core.util.IdUtil;
|
|
|
|
|
import com.alibaba.fastjson.JSON;
|
|
|
|
|
import com.alibaba.fastjson.JSONArray;
|
|
|
|
|
import com.alibaba.fastjson.JSONObject;
|
|
|
|
|
import com.sztzjy.marketing.annotation.AnonymousAccess;
|
|
|
|
|
import com.sztzjy.marketing.entity.dto.CommentDTO;
|
|
|
|
|
import com.sztzjy.marketing.entity.dto.NewDescriptiveStatisticsDTO;
|
|
|
|
|
import com.sztzjy.marketing.entity.dto.SentimentAnalyDTO;
|
|
|
|
|
import com.sztzjy.marketing.qianfan.util.Json;
|
|
|
|
|
import com.sztzjy.marketing.util.ResultEntity;
|
|
|
|
|
import io.swagger.annotations.Api;
|
|
|
|
|
import io.swagger.annotations.ApiOperation;
|
|
|
|
|
import io.swagger.annotations.ApiParam;
|
|
|
|
|
import org.springframework.http.HttpStatus;
|
|
|
|
|
import org.springframework.web.bind.annotation.PostMapping;
|
|
|
|
|
import org.springframework.web.bind.annotation.RequestBody;
|
|
|
|
@ -18,7 +18,10 @@ import org.springframework.web.bind.annotation.RequestMapping;
|
|
|
|
|
import org.springframework.web.bind.annotation.RestController;
|
|
|
|
|
|
|
|
|
|
import java.io.*;
|
|
|
|
|
import java.util.*;
|
|
|
|
|
import java.util.ArrayList;
|
|
|
|
|
import java.util.Arrays;
|
|
|
|
|
import java.util.List;
|
|
|
|
|
import java.util.Map;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@RestController
|
|
|
|
@ -371,5 +374,198 @@ public class StuPythonController {
|
|
|
|
|
return new ResultEntity<>(HttpStatus.INTERNAL_SERVER_ERROR);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
@PostMapping("/descriptiveStatistics")
|
|
|
|
|
@ApiOperation("描述性统计分析")
|
|
|
|
|
@AnonymousAccess
|
|
|
|
|
public ResultEntity descriptiveStatistics(@ApiParam("分析的数据") @RequestBody JSONObject data, @ApiParam("计算量") @RequestBody JSONObject compute) {
|
|
|
|
|
// System.out.println(text);
|
|
|
|
|
String analyzedData = data.getString("data");
|
|
|
|
|
String computational = data.getString("compute");
|
|
|
|
|
String code = "# -*- coding: utf-8 -*-\n" +
|
|
|
|
|
"import pandas as pd\n" +
|
|
|
|
|
"import numpy as np\n" +
|
|
|
|
|
"from scipy import stats\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
"# 生成一些示例数据,故意在某些位置插入 NaN\n" +
|
|
|
|
|
"data = {\n" +
|
|
|
|
|
" 'age': [25, 32, None, 51, 23, 34, 36, 42, None, 40]\n" +
|
|
|
|
|
"}\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
"# 创建 DataFrame\n" +
|
|
|
|
|
"df = pd.DataFrame(data)\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
"# 计算描述性统计的函数\n" +
|
|
|
|
|
"def descriptive_statistics(valid_data, stats_to_compute):\n" +
|
|
|
|
|
" stats_summary = {}\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" if '平均数' in stats_to_compute:\n" +
|
|
|
|
|
" stats_summary['平均数'] = round(valid_data.mean(), 1)\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" if '中位数' in stats_to_compute:\n" +
|
|
|
|
|
" stats_summary['中位数'] = round(valid_data.median(), 1)\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" if '众数' in stats_to_compute:\n" +
|
|
|
|
|
" stats_summary['众数'] = round(valid_data.mode()[0], 1) # 取众数的第一个值\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" if '标准差' in stats_to_compute:\n" +
|
|
|
|
|
" stats_summary['标准差'] = round(valid_data.std(ddof=0), 1)\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" if '方差' in stats_to_compute:\n" +
|
|
|
|
|
" stats_summary['方差'] = round(valid_data.var(ddof=0), 1)\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" if '标准误差' in stats_to_compute:\n" +
|
|
|
|
|
" stats_summary['标准误差'] = round(valid_data.std(ddof=0) / np.sqrt(len(valid_data)), 1)\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" if '峰度' in stats_to_compute:\n" +
|
|
|
|
|
" stats_summary['峰度'] = round(stats.kurtosis(valid_data, fisher=True), 1)\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" if '偏度' in stats_to_compute:\n" +
|
|
|
|
|
" stats_summary['偏度'] = round(stats.skew(valid_data), 1)\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" if '最大值' in stats_to_compute:\n" +
|
|
|
|
|
" stats_summary['最大值'] = round(valid_data.max(), 1)\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" if '最小值' in stats_to_compute:\n" +
|
|
|
|
|
" stats_summary['最小值'] = round(valid_data.min(), 1)\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" if '求和' in stats_to_compute:\n" +
|
|
|
|
|
" stats_summary['求和'] = round(valid_data.sum(), 1)\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" # 仅在请求时计算观测数\n" +
|
|
|
|
|
" if '观测数' in stats_to_compute:\n" +
|
|
|
|
|
" stats_summary['观测数'] = valid_data.count() # 观测数不需要四舍五入\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
" return stats_summary\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
"# 根据传入的列和需要计算的统计量进行描述性统计分析\n" +
|
|
|
|
|
"def analyze_columns(df, stats_to_compute):\n" +
|
|
|
|
|
" for column in df.columns: # 直接遍历 DataFrame 中的所有列\n" +
|
|
|
|
|
" valid_data = df[column].dropna() # 去掉空值\n" +
|
|
|
|
|
" if len(valid_data) == 0:\n" +
|
|
|
|
|
" print(f\"{column} 列没有有效数据,跳过该列的描述性统计分析。\\n\")\n" +
|
|
|
|
|
" else:\n" +
|
|
|
|
|
" print(f\"{column} 的描述性统计:\")\n" +
|
|
|
|
|
" stats_summary = descriptive_statistics(valid_data, stats_to_compute) # 计算有效数据的描述性统计\n" +
|
|
|
|
|
" for stat_name, value in stats_summary.items():\n" +
|
|
|
|
|
" print(f\"{stat_name}: {value}\")\n" +
|
|
|
|
|
" print(\"\\n\")\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
"\n" +
|
|
|
|
|
"# 计算的统计量\n" +
|
|
|
|
|
"stats_to_compute = ['平均数', '中位数', '观测数'] # 可以根据需要修改要计算的统计量\n" +
|
|
|
|
|
"analyze_columns(df, stats_to_compute)";
|
|
|
|
|
|
|
|
|
|
// 替换代码中的 data和compute 为实际的 analyzedData和computational
|
|
|
|
|
String updatedCode = code.replace(" 'age': [25, 32, None, 51, 23, 34, 36, 42, None, 40]", "'" + analyzedData + "'")
|
|
|
|
|
.replace("'平均数', '中位数', '观测数'",computational);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//System.out.println(updatedCode);
|
|
|
|
|
try {
|
|
|
|
|
String s = IdUtil.simpleUUID();
|
|
|
|
|
String tempPythonFile = "/usr/local/tianzeProject/digitalMarketing/cnsenti/cnsenti/" + s + ".py";
|
|
|
|
|
|
|
|
|
|
File file1 = new File(tempPythonFile);
|
|
|
|
|
|
|
|
|
|
// 确保父目录存在
|
|
|
|
|
File parentDir = file1.getParentFile();
|
|
|
|
|
if (!parentDir.exists()) {
|
|
|
|
|
System.out.println("Parent directory does not exist. Creating it.");
|
|
|
|
|
if (!parentDir.mkdirs()) {
|
|
|
|
|
System.out.println("Failed to create directories.");
|
|
|
|
|
return new ResultEntity<>(HttpStatus.INTERNAL_SERVER_ERROR);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 创建文件并写入内容
|
|
|
|
|
if (!file1.exists()) {
|
|
|
|
|
try {
|
|
|
|
|
boolean fileCreated = file1.createNewFile();
|
|
|
|
|
if (fileCreated) {
|
|
|
|
|
System.out.println("File created successfully: " + tempPythonFile);
|
|
|
|
|
} else {
|
|
|
|
|
System.out.println("File already exists: " + tempPythonFile);
|
|
|
|
|
}
|
|
|
|
|
} catch (IOException e) {
|
|
|
|
|
e.printStackTrace();
|
|
|
|
|
return new ResultEntity<>(HttpStatus.INTERNAL_SERVER_ERROR);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try (PrintWriter out = new PrintWriter(file1)) {
|
|
|
|
|
out.println(updatedCode);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 确认 Docker 命令
|
|
|
|
|
String[] command = {"docker", "exec", "pyexe", "python", tempPythonFile};
|
|
|
|
|
|
|
|
|
|
Process process = Runtime.getRuntime().exec(command);
|
|
|
|
|
|
|
|
|
|
// 获取进程的输入流
|
|
|
|
|
BufferedReader inputStream = new BufferedReader(new InputStreamReader(process.getInputStream()));
|
|
|
|
|
BufferedReader errorStream = new BufferedReader(new InputStreamReader(process.getErrorStream()));
|
|
|
|
|
|
|
|
|
|
// 读取 Python 代码的输出
|
|
|
|
|
StringBuilder output = new StringBuilder();
|
|
|
|
|
String line;
|
|
|
|
|
while ((line = inputStream.readLine()) != null) {
|
|
|
|
|
output.append(line).append("\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 读取 Python 代码的错误信息
|
|
|
|
|
StringBuilder errors = new StringBuilder();
|
|
|
|
|
while ((line = errorStream.readLine()) != null) {
|
|
|
|
|
errors.append(line).append("\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 等待进程执行完成
|
|
|
|
|
int exitCode = process.waitFor();
|
|
|
|
|
if (exitCode == 0) {
|
|
|
|
|
String outputStr = output.toString().trim();
|
|
|
|
|
// 解析 JSON 输出
|
|
|
|
|
Map<String, Map<String, Object>> jsonResults = JSON.parseObject(outputStr, Map.class);
|
|
|
|
|
|
|
|
|
|
List<NewDescriptiveStatisticsDTO> list=new ArrayList<>();
|
|
|
|
|
// 创建并填充 NewDescriptiveStatisticsDTO 对象
|
|
|
|
|
for (Map.Entry<String, Map<String, Object>> entry : jsonResults.entrySet()) {
|
|
|
|
|
NewDescriptiveStatisticsDTO statisticsDTO = new NewDescriptiveStatisticsDTO();
|
|
|
|
|
|
|
|
|
|
Map<String, Object> stats = entry.getValue();
|
|
|
|
|
statisticsDTO.setAverage(((Number) stats.get("平均数")).doubleValue());
|
|
|
|
|
statisticsDTO.setMedian(((Number) stats.get("中位数")).doubleValue());
|
|
|
|
|
statisticsDTO.setMode(((Number) stats.get("众数")).doubleValue());
|
|
|
|
|
statisticsDTO.setStandardDeviation(((Number) stats.get("标准差")).doubleValue());
|
|
|
|
|
statisticsDTO.setVariance(((Number) stats.get("方差")).doubleValue());
|
|
|
|
|
statisticsDTO.setStandardError(((Number) stats.get("标准误差")).doubleValue());
|
|
|
|
|
statisticsDTO.setKurtosis(((Number) stats.get("峰度")).doubleValue());
|
|
|
|
|
statisticsDTO.setSkewness(((Number) stats.get("偏度")).doubleValue());
|
|
|
|
|
statisticsDTO.setMax(((Number) stats.get("最大值")).doubleValue());
|
|
|
|
|
statisticsDTO.setMin(((Number) stats.get("最小值")).doubleValue());
|
|
|
|
|
statisticsDTO.setSummation(((Number) stats.get("求和")).doubleValue());
|
|
|
|
|
statisticsDTO.setObservations(((Number) stats.get("观测数")).intValue());
|
|
|
|
|
|
|
|
|
|
// 输出或者处理 statisticsDTO 对象
|
|
|
|
|
System.out.println("Column: " + entry.getKey());
|
|
|
|
|
System.out.println(statisticsDTO);
|
|
|
|
|
|
|
|
|
|
list.add(statisticsDTO);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// System.out.println("Python code output:\n" + output.toString());
|
|
|
|
|
return new ResultEntity(HttpStatus.OK,list);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
// System.err.println("Error executing Python code:\n" + errors.toString());
|
|
|
|
|
return new ResultEntity(HttpStatus.INTERNAL_SERVER_ERROR, errors.toString());
|
|
|
|
|
}
|
|
|
|
|
} catch (IOException | InterruptedException e) {
|
|
|
|
|
e.printStackTrace();
|
|
|
|
|
return new ResultEntity<>(HttpStatus.INTERNAL_SERVER_ERROR);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|