数字营销实训算法第五轮修改

7 changed files with 29521 additions and 124 deletions
--- a/src/main/java/com/sztzjy/marketing/controller/stu/StuDigitalMarketingModelController.java
+++ b/src/main/java/com/sztzjy/marketing/controller/stu/StuDigitalMarketingModelController.java
@ -10,6 +10,7 @@ import com.sztzjy.marketing.mapper.StuSpendingLevelMapper;
 import com.sztzjy.marketing.mapper.StuUserCommentMapper;
 import com.sztzjy.marketing.mapper.StuUserSalesAbilityMapper;
 import com.sztzjy.marketing.service.StuDigitalMarketingModelService;
 import com.sztzjy.marketing.util.BigDecimalUtils;
 import com.sztzjy.marketing.util.DataConverter;
 import com.sztzjy.marketing.util.ResultEntity;
 import com.sztzjy.marketing.util.algorithm.Apriori;
@ -229,18 +230,17 @@ public class StuDigitalMarketingModelController {
    @PostMapping("/logistic")
    @AnonymousAccess
    public ResultEntity logistic(@RequestBody LogisticDTO logisticDTO) {
-        double[][] x = logisticDTO.getX();
+        double[] x = logisticDTO.getX();
        double[] y = logisticDTO.getY();
-        // 创建一个逻辑回归模型实例,0.1学习率
+        LogisticRegression lr = new LogisticRegression(x, y);
-        LogisticRegression model = new LogisticRegression(0.1);
+        double[] coefficients = lr.fit();
        model.train(x,y,1000); //训练模型
        DecimalFormat df = new DecimalFormat("#.0");
        RAnalysisDTO rAnalysisDTO=new RAnalysisDTO();
-        rAnalysisDTO.setIntercept(Double.parseDouble(df.format(model.getIntercept())));
+        rAnalysisDTO.setIntercept(Double.parseDouble(df.format(coefficients[0])));
-        rAnalysisDTO.setSlope(Double.parseDouble(df.format(model.getCoefficient())));
+        rAnalysisDTO.setSlope(Double.parseDouble(df.format(coefficients[1])));
        return new ResultEntity(HttpStatus.OK,"成功",rAnalysisDTO);
    }
@ -252,10 +252,8 @@ public class StuDigitalMarketingModelController {
    public ResultEntity prediction(@RequestBody LogisticPredictDTO logisticPredictDTO) {
            DecimalFormat df = new DecimalFormat("#.0");
-
+            BigDecimalUtils bigDecimalUtils=new BigDecimalUtils();
-            // 创建一个逻辑回归模型实例,0.1学习率
+            double predict = bigDecimalUtils.add(bigDecimalUtils.mul(logisticPredictDTO.getSlope(),logisticPredictDTO.getX()),logisticPredictDTO.getIntercept());
            LogisticRegression model = new LogisticRegression(0.1);
            double predict = model.predict(logisticPredictDTO.getLX(), logisticPredictDTO.getSlope(), logisticPredictDTO.getIntercept());//预测
            return new ResultEntity(HttpStatus.OK,"成功",df.format(predict));
--- a/src/main/java/com/sztzjy/marketing/entity/dto/LogisticDTO.java
+++ b/src/main/java/com/sztzjy/marketing/entity/dto/LogisticDTO.java
@ -11,7 +11,7 @@ import lombok.Data;
 public class LogisticDTO {
    @ApiModelProperty("自变量x")
-    private double[][] x;
+    private double[] x;
    @ApiModelProperty("因变量y")
    private double[] y;
--- a/src/main/java/com/sztzjy/marketing/entity/dto/LogisticPredictDTO.java
+++ b/src/main/java/com/sztzjy/marketing/entity/dto/LogisticPredictDTO.java
@ -11,8 +11,7 @@ import org.ujmp.core.util.R;
@EqualsAndHashCode(callSuper = true)
@Data
 public class LogisticPredictDTO extends RAnalysisDTO {
    private double raX;
-    private double lX;
+    private double x;
 }
--- a/src/main/java/com/sztzjy/marketing/service/impl/StuDigitalMarketingModelServiceImpl.java
+++ b/src/main/java/com/sztzjy/marketing/service/impl/StuDigitalMarketingModelServiceImpl.java
@ -21,6 +21,8 @@ import org.springframework.http.HttpStatus;
 import org.springframework.stereotype.Service;
 import javax.annotation.Resource;
 import java.io.BufferedReader;
 import java.io.FileReader;
 import java.io.IOException;
 import java.text.DecimalFormat;
 import java.time.Instant;
@ -155,13 +157,16 @@ public class StuDigitalMarketingModelServiceImpl implements StuDigitalMarketingM
            List<Term> terms = HanLP.segment(string);
            // 创建词汇表
            Map<String, Integer> wordCount = new HashMap<>();
-            // 统计词频
+            // 读取停用词
            Set<String> stopwords = this.loadStopwords("/usr/local/tianzeProject/digitalMarketing/jarAndDockerFile/停用词.txt");
 //            Set<String> stopwords = this.loadStopwords("D:\\project\\digital_marketing\\src\\main\\resources\\停用词.txt");
-            // 创建计数器
+            // 统计词频
            int count = 0;
            for (Term term : terms) {
                String word = term.word.trim(); // 去除空格
-                if(!word.isEmpty()){
+                if (!word.isEmpty() && !stopwords.contains(word)) { // 过滤停用词
                    count++;
                    if (wordCount.containsKey(word)) {
                        wordCount.put(word, wordCount.get(word) + 1);
@ -173,20 +178,22 @@ public class StuDigitalMarketingModelServiceImpl implements StuDigitalMarketingM
                    break;
                }
            }
            // 将词汇表转换为列表，便于排序
            List<Map.Entry<String, Integer>> wordList = new ArrayList<>(wordCount.entrySet());
            // 按词频排序
            wordList.sort((o1, o2) -> o2.getValue().compareTo(o1.getValue()));
-            List<WordFrequencyDTO> list=new ArrayList<>();
+            List<WordFrequencyDTO> list = new ArrayList<>();
            for (Map.Entry<String, Integer> entry : wordList) {
-                WordFrequencyDTO frequencyDTO=new WordFrequencyDTO();
+                WordFrequencyDTO frequencyDTO = new WordFrequencyDTO();
                frequencyDTO.setKeyword(entry.getKey());
                frequencyDTO.setFrequency(entry.getValue());
                list.add(frequencyDTO);
            }
-            return new ResultEntity<>(HttpStatus.OK,"成功",list);
+
            return new ResultEntity<>(HttpStatus.OK, "成功", list);
        }
 //        if(modelType.equals(Constant.WORD_CLOUD)){ //词云分析
@ -621,4 +628,16 @@ public class StuDigitalMarketingModelServiceImpl implements StuDigitalMarketingM
        return filteredAndConvertedList;
    }
    // 读取停用词
    Set<String> loadStopwords(String filePath) throws IOException {
        Set<String> stopwords = new HashSet<>();
        try (BufferedReader br = new BufferedReader(new FileReader(filePath))) {
            String line;
            while ((line = br.readLine()) != null) {
                stopwords.add(line.trim());
            }
        }
        return stopwords;
    }
 }
--- a/src/main/java/com/sztzjy/marketing/util/algorithm/LogisticRegression.java
+++ b/src/main/java/com/sztzjy/marketing/util/algorithm/LogisticRegression.java
@ -1,121 +1,44 @@
 package com.sztzjy.marketing.util.algorithm;
 import com.sztzjy.marketing.util.BigDecimalUtils;
 import java.text.DecimalFormat;
 public class LogisticRegression {
    private double theta0; // 截距项
    private double theta1; // 斜率
    private double learningRate; // 学习率
    private int currentEpoch; // 声明currentEpoch变量
    public LogisticRegression(double learningRate) {
        this.learningRate = learningRate;
    }
    private double[] x;
    private double[] y;
-    public LogisticRegression(double learningRate, double initialTheta0, double initialTheta1) {
+    public LogisticRegression(double[] x, double[] y) {
-        this.learningRate = learningRate;
+        this.x = x;
-        this.theta0 = initialTheta0;
+        this.y = y;
        this.theta1 = initialTheta1;
        this.currentEpoch = 0; // 在构造函数中初始化currentEpoch
    }
-    // 添加一个方法来获取截距项
+    // 计算线性回归参数
-    public double getIntercept() {
+    public double[] fit() {
-        return theta0;
+        if (x.length != y.length || x.length == 0) {
            throw new IllegalArgumentException("Invalid input data");
        }
-    // 添加一个方法来获取第一个特征的系数
+        int n = x.length;
-    public double getCoefficient() {
+        double sumX = 0;
-        return theta1;
+        double sumY = 0;
-    }
+        double sumXY = 0;
        double sumXX = 0;
-    // Sigmoid函数
+        // 计算各项和
-    private double sigmoid(double z) {
+        for (int i = 0; i < n; i++) {
-        return 1 / (1 + Math.exp(-z));
+            sumX += x[i];
            sumY += y[i];
            sumXY += x[i] * y[i];
            sumXX += x[i] * x[i];
        }
-    // 预测函数
+        // 计算斜率和截距
-    public double predict(double x,double slopes,double intercept) {
+        double slope = (n * sumXY - sumX * sumY) / (n * sumXX - sumX * sumX);
-        BigDecimalUtils bigDecimalUtils=new BigDecimalUtils();
+        double intercept = (sumY - slope * sumX) / n;
        Double mul = bigDecimalUtils.mul(intercept, x);
        return bigDecimalUtils.add(slopes, mul);
        return new double[] { intercept, slope };
    }
    private double dotProduct(double[] a, double[] b) {
        double result = 0.0;
        for (int i = 0; i < a.length; i++) {
            result += a[i] * b[i];
        }
        return result;
    }
 //
 //    // 梯度下降更新参数
 //    public void updateParameters(double x, double y) {
 //        double h = predict(x);
 //        double error = y - h;
 //        theta1 += learningRate * error * h * (1 - h) * x;
 //        theta0 += learningRate * error * h * (1 - h);
 //    }
    private double exponentialDecayLearningRate(double currentEpoch, int totalEpochs) {
        return learningRate * Math.exp(-(currentEpoch / totalEpochs));
    }
    // 训练模型
    public void train(double[][] data, double[] labels, int epochs) {
        double learningRateCurrentEpoch = exponentialDecayLearningRate(currentEpoch, epochs);
        for (int epoch = 0; epoch < epochs; epoch++) {
            for (int i = 0; i < data.length; i++) {
                double x = data[i][0];
                double y = labels[i];
                double h = sigmoid(theta0 + theta1 * x);
                double error = y - h;
                theta1 += learningRateCurrentEpoch * error * h * (1 - h) * x;
                theta0 += learningRateCurrentEpoch * error * h * (1 - h);
            }
            // 可以添加打印语句查看训练情况
 //            System.out.println("Epoch: " + (epoch + 1) + ", Loss: " + calculateLoss(data, labels));
        }
    }
    public double calculateLoss(double[][] data, double[] labels) {
        double loss = 0;
        for (int i = 0; i < data.length; i++) {
            double x = data[i][0];
            double y = labels[i];
            double h = sigmoid(theta0 + theta1 * x);
            loss += -y * Math.log(h) - (1 - y) * Math.log(1 - h);
        }
        return loss / data.length;
    }
 //    public static void main(String[] args) {
 //        LogisticRegression model = new LogisticRegression(0.1); // 创建一个逻辑回归模型实例
 //
 ////        double[][] data = {{1}, {2}, {3}, {4}, {5}};
 ////        double[] labels = {0, 0, 1, 1, 1}; // 假设这是一个二分类问题
 //
 //        double[][] data = {{1, 19}, {1, 21}, {2, 20}, {2, 23}, {2, 31}};
 //        double[] labels = {15, 15, 16, 16, 17};
 //
 //         model.train(data, labels, 1000); // 训练模型
 //
 //        DecimalFormat df = new DecimalFormat("#.0");
 //
 //        // 获取并打印截距项和系数
 //        double intercept = model.getIntercept();
 //        double coefficient = model.getCoefficient();
 //        System.out.println("常数项值: " + df.format(intercept));
 //        System.out.println("系数值 " + df.format(coefficient));
 //
 //        double prediction = model.predict(2);
 //        System.out.println("x=3.5的预测： " + df.format(prediction));
 //    }
 }
--- a/src/main/resources/words/停用词.txt
+++ b/src/main/resources/words/停用词.txt
--- a/src/main/resources/大连理工情感词汇.txt
+++ b/src/main/resources/大连理工情感词汇.txt
Author	SHA1	Message	Date
@t2652009480	fc623db0ca	数字营销实训算法第五轮修改	7 months ago
@t2652009480	48588b59aa	数字营销实训算法第五轮修改	7 months ago