如何用 C 语言计算皮尔逊相关系数

简介

在本实验中，我们将学习如何用 C 语言计算皮尔逊相关系数。本实验涵盖三个主要步骤：读取成对的 (x, y) 数据、计算必要的总和，以及使用公式计算相关系数。我们将创建一个 C 程序，允许用户输入数据点，然后程序将执行相关性分析并输出结果。

本实验提供了详细的分步指南，首先实现数据输入功能，接着计算相关公式所需的总和，最后打印相关系数。

读取成对的 (x, y) 数据

在这一步中，我们将学习如何读取成对的 (x, y) 数据，以便在 C 语言中计算皮尔逊相关系数。我们将创建一个程序，允许用户输入成对的数值数据并存储起来以供进一步分析。

首先，为我们的数据输入功能创建一个 C 源文件：

cd ~/project
nano correlation_input.c

现在，将以下代码添加到文件中：

#include <stdio.h>
#define MAX_POINTS 100

int main() {
    double x[MAX_POINTS], y[MAX_POINTS];
    int n, i;

    printf("Enter the number of data points (max %d): ", MAX_POINTS);
    scanf("%d", &n);

    printf("Enter x and y coordinates:\n");
    for (i = 0; i < n; i++) {
        printf("Point %d (x y): ", i + 1);
        scanf("%lf %lf", &x[i], &y[i]);
    }

    printf("\nData Points Entered:\n");
    for (i = 0; i < n; i++) {
        printf("Point %d: (%.2f, %.2f)\n", i + 1, x[i], y[i]);
    }

    return 0;
}

编译程序：

gcc -o correlation_input correlation_input.c

运行程序并输入一些示例数据：

./correlation_input

示例输出：

Enter the number of data points (max 100): 5
Enter x and y coordinates:
Point 1 (x y): 1 2
Point 2 (x y): 2 4
Point 3 (x y): 3 5
Point 4 (x y): 4 4
Point 5 (x y): 5 5

Data Points Entered:
Point 1: (1.00, 2.00)
Point 2: (2.00, 4.00)
Point 3: (3.00, 5.00)
Point 4: (4.00, 4.00)
Point 5: (5.00, 5.00)

下面我们来分析一下这段代码：

我们定义了最大数据点数 (MAX_POINTS) 以防止内存溢出。
程序提示用户输入数据点数。
然后它允许用户为每个点输入 x 和 y 坐标。
最后，它打印出输入的数据点以确认输入。

计算总和并使用相关公式

在这一步中，我们将扩展之前的程序，以计算计算皮尔逊相关系数所需的总和。我们将修改correlation_input.c文件，使其包含相关公式的计算。

打开之前的文件：

cd ~/project
nano correlation_input.c

用以下实现更新代码：

#include <stdio.h>
#include <math.h>
#define MAX_POINTS 100

double calculatePearsonCorrelation(double x[], double y[], int n) {
    double sum_x = 0, sum_y = 0, sum_xy = 0;
    double sum_x_squared = 0, sum_y_squared = 0;

    // 计算必要的总和
    for (int i = 0; i < n; i++) {
        sum_x += x[i];
        sum_y += y[i];
        sum_xy += x[i] * y[i];
        sum_x_squared += x[i] * x[i];
        sum_y_squared += y[i] * y[i];
    }

    // 皮尔逊相关系数公式
    double numerator = n * sum_xy - sum_x * sum_y;
    double denominator = sqrt((n * sum_x_squared - sum_x * sum_x) *
                               (n * sum_y_squared - sum_y * sum_y));

    return numerator / denominator;
}

int main() {
    double x[MAX_POINTS], y[MAX_POINTS];
    int n, i;

    printf("Enter the number of data points (max %d): ", MAX_POINTS);
    scanf("%d", &n);

    printf("Enter x and y coordinates:\n");
    for (i = 0; i < n; i++) {
        printf("Point %d (x y): ", i + 1);
        scanf("%lf %lf", &x[i], &y[i]);
    }

    double correlation = calculatePearsonCorrelation(x, y, n);

    printf("\nData Points Entered:\n");
    for (i = 0; i < n; i++) {
        printf("Point %d: (%.2f, %.2f)\n", i + 1, x[i], y[i]);
    }

    printf("\nPearson Correlation Coefficient: %.4f\n", correlation);

    return 0;
}

使用数学库编译程序：

gcc -o correlation_input correlation_input.c -lm

使用示例数据运行程序：

./correlation_input

示例输出：

Enter the number of data points (max 100): 5
Enter x and y coordinates:
Point 1 (x y): 1 2
Point 2 (x y): 2 4
Point 3 (x y): 3 5
Point 4 (x y): 4 4
Point 5 (x y): 5 5

Data Points Entered:
Point 1: (1.00, 2.00)
Point 2: (2.00, 4.00)
Point 3: (3.00, 5.00)
Point 4: (4.00, 4.00)
Point 5: (5.00, 5.00)

Pearson Correlation Coefficient: 0.8528

关于皮尔逊相关计算的要点：

我们计算必要的总和：x、y、xy、x²、y²
应用皮尔逊相关系数公式
使用 math.h 中的 sqrt() 进行计算
返回介于 -1 和 1 之间的相关系数

打印相关系数

在这最后一步中，我们将改进程序，以对皮尔逊相关系数提供全面的解释，并创建一个更用户友好的输出。

打开之前的文件：

cd ~/project
nano correlation_input.c

用以下实现更新代码：

#include <stdio.h>
#include <math.h>
#define MAX_POINTS 100

double calculatePearsonCorrelation(double x[], double y[], int n) {
    double sum_x = 0, sum_y = 0, sum_xy = 0;
    double sum_x_squared = 0, sum_y_squared = 0;

    for (int i = 0; i < n; i++) {
        sum_x += x[i];
        sum_y += y[i];
        sum_xy += x[i] * y[i];
        sum_x_squared += x[i] * x[i];
        sum_y_squared += y[i] * y[i];
    }

    double numerator = n * sum_xy - sum_x * sum_y;
    double denominator = sqrt((n * sum_x_squared - sum_x * sum_x) *
                               (n * sum_y_squared - sum_y * sum_y));

    return numerator / denominator;
}

void interpretCorrelation(double correlation) {
    printf("\n相关系数解释：\n");
    printf("相关值：%.4f\n", correlation);

    if (correlation > 0.8) {
        printf("强正相关\n");
    } else if (correlation > 0.5) {
        printf("中度正相关\n");
    } else if (correlation > 0.3) {
        printf("弱正相关\n");
    } else if (correlation > -0.3) {
        printf("无线性相关\n");
    } else if (correlation > -0.5) {
        printf("弱负相关\n");
    } else if (correlation > -0.8) {
        printf("中度负相关\n");
    } else {
        printf("强负相关\n");
    }
}

int main() {
    double x[MAX_POINTS], y[MAX_POINTS];
    int n, i;

    printf("皮尔逊相关系数计算器\n");
    printf("----------------------------------------\n");
    printf("输入数据点数量（最大 %d）：", MAX_POINTS);
    scanf("%d", &n);

    printf("输入 x 和 y 坐标：\n");
    for (i = 0; i < n; i++) {
        printf("点 %d (x y)：", i + 1);
        scanf("%lf %lf", &x[i], &y[i]);
    }

    double correlation = calculatePearsonCorrelation(x, y, n);

    printf("\n输入的数据点：\n");
    for (i = 0; i < n; i++) {
        printf("点 %d：(%.2f, %.2f)\n", i + 1, x[i], y[i]);
    }

    interpretCorrelation(correlation);

    return 0;
}

编译程序：

gcc -o correlation_calculator correlation_input.c -lm

使用示例数据运行程序：

./correlation_calculator

示例输出：

皮尔逊相关系数计算器
----------------------------------------
输入数据点数量（最大 100）：5
输入 x 和 y 坐标：
点 1 (x y)：1 2
点 2 (x y)：2 4
点 3 (x y)：3 5
点 4 (x y)：4 4
点 5 (x y)：5 5

输入的数据点：
点 1：(1.00, 2.00)
点 2：(2.00, 4.00)
点 3：(3.00, 5.00)
点 4：(4.00, 4.00)
点 5：(5.00, 5.00)

相关系数解释：
相关值：0.8528
强正相关

主要改进：

添加了interpretCorrelation()函数
提供了相关强度的详细解释
将相关性分类为不同级别
通过标题和清晰的输出来增强用户界面

总结

在本实验中，我们学习了如何读取成对的 (x, y) 数据，以便在 C 语言中计算皮尔逊相关系数。我们创建了一个程序，允许用户输入成对的数值数据并存储起来以供进一步分析。我们还扩展了该程序，以使用公式计算计算皮尔逊相关系数所需的总和。

本实验涵盖的关键步骤包括读取成对的 (x, y) 数据、计算相关公式所需的总和，以及打印最终的相关系数。通过遵循这些步骤，你可以在自己的 C 程序中实现皮尔逊相关系数的计算。