C 言語で線形回帰パラメータを計算する方法

はじめに

この実験では、C 言語を使用して線形回帰のパラメータ（傾き (m) と切片 (b)）を計算する方法を学びます。この実験では、(x,y) のデータポイントを読み取り、傾きと切片を計算し、y = mx + b の形式で線形回帰方程式を出力する手順を段階的に説明します。この実験は、広く使用されているプログラミング言語である C を用いた統計データ分析とモデリングの実践的なアプローチを提供します。

Skills Graph

%%%%{init: {'theme':'neutral'}}%%%% flowchart RL c(("C")) -.-> c/UserInteractionGroup(["User Interaction"]) c(("C")) -.-> c/BasicsGroup(["Basics"]) c(("C")) -.-> c/CompoundTypesGroup(["Compound Types"]) c(("C")) -.-> c/FunctionsGroup(["Functions"]) c/BasicsGroup -.-> c/data_types("Data Types") c/CompoundTypesGroup -.-> c/arrays("Arrays") c/FunctionsGroup -.-> c/math_functions("Math Functions") c/UserInteractionGroup -.-> c/user_input("User Input") c/UserInteractionGroup -.-> c/output("Output") subgraph Lab Skills c/data_types -.-> lab-435150{{"C 言語で線形回帰パラメータを計算する"}} c/arrays -.-> lab-435150{{"C 言語で線形回帰パラメータを計算する"}} c/math_functions -.-> lab-435150{{"C 言語で線形回帰パラメータを計算する"}} c/user_input -.-> lab-435150{{"C 言語で線形回帰パラメータを計算する"}} c/output -.-> lab-435150{{"C 言語で線形回帰パラメータを計算する"}} end

(x,y) データポイントの読み取り

このステップでは、C 言語で線形回帰分析のための (x,y) データポイントを読み取る方法を学びます。複数のデータポイントの入力を可能にし、それらをさらなる計算のために保存するプログラムを作成します。

まず、データポイントの読み取りを実装する C ファイルを作成しましょう。

cd ~/project
nano linear_regression.c

次に、以下のコードをファイルに追加します。

#include <stdio.h>
#define MAX_POINTS 100

typedef struct {
    double x;
    double y;
} DataPoint;

int main() {
    DataPoint points[MAX_POINTS];
    int num_points = 0;

    printf("Enter x and y coordinates (enter -1 -1 to finish):\n");

    while (num_points < MAX_POINTS) {
        double x, y;
        scanf("%lf %lf", &x, &y);

        if (x == -1 && y == -1) {
            break;
        }

        points[num_points].x = x;
        points[num_points].y = y;
        num_points++;
    }

    printf("\nData Points Entered:\n");
    for (int i = 0; i < num_points; i++) {
        printf("Point %d: (%.2f, %.2f)\n", i+1, points[i].x, points[i].y);
    }

    return 0;
}

プログラムをコンパイルします。

gcc -o linear_regression linear_regression.c

プログラムを実行し、いくつかのサンプルデータポイントを入力します。

./linear_regression

出力例:

Enter x and y coordinates (enter -1 -1 to finish):
1 2
2 4
3 5
4 4
5 5
-1 -1

Data Points Entered:
Point 1: (1.00, 2.00)
Point 2: (2.00, 4.00)
Point 3: (3.00, 5.00)
Point 4: (4.00, 4.00)
Point 5: (5.00, 5.00)

このコードの主要なコンポーネントを分解してみましょう。

DataPoint 構造体を定義して、x 座標と y 座標を保存します。
MAX_POINTS はオーバーフローを防ぐためにデータポイントの数を制限します。
プログラムは while ループを使用して座標を読み取ります。
ユーザーはデータポイントを入力し、-1 -1 を入力することで入力を終了できます。
プログラムは入力されたすべてのデータポイントを出力して検証します。

傾き (m) と切片 (b) の計算

このステップでは、最小二乗法を使用して線形回帰の傾き (m) と切片 (b) を計算する方法を学びます。

まず、前の linear_regression.c ファイルを更新します。

cd ~/project
nano linear_regression.c

前のコードを以下の実装に置き換えます。

#include <stdio.h>
#include <math.h>
#define MAX_POINTS 100

typedef struct {
    double x;
    double y;
} DataPoint;

// Function to compute linear regression parameters
void computeLinearRegression(DataPoint points[], int num_points, double *m, double *b) {
    double sum_x = 0, sum_y = 0, sum_xy = 0, sum_x_squared = 0;

    for (int i = 0; i < num_points; i++) {
        sum_x += points[i].x;
        sum_y += points[i].y;
        sum_xy += points[i].x * points[i].y;
        sum_x_squared += points[i].x * points[i].x;
    }

    double n = num_points;

    // Compute slope (m)
    *m = (n * sum_xy - sum_x * sum_y) / (n * sum_x_squared - sum_x * sum_x);

    // Compute y-intercept (b)
    *b = (sum_y - (*m) * sum_x) / n;
}

int main() {
    DataPoint points[MAX_POINTS];
    int num_points = 0;

    printf("Enter x and y coordinates (enter -1 -1 to finish):\n");

    while (num_points < MAX_POINTS) {
        double x, y;
        scanf("%lf %lf", &x, &y);

        if (x == -1 && y == -1) {
            break;
        }

        points[num_points].x = x;
        points[num_points].y = y;
        num_points++;
    }

    double slope, intercept;
    computeLinearRegression(points, num_points, &slope, &intercept);

    printf("\nLinear Regression Results:\n");
    printf("Number of points: %d\n", num_points);
    printf("Slope (m): %.4f\n", slope);
    printf("Y-Intercept (b): %.4f\n", intercept);
    printf("Equation: y = %.4fx + %.4f\n", slope, intercept);

    return 0;
}

数学ライブラリを使用してプログラムをコンパイルします。

gcc -o linear_regression linear_regression.c -lm

サンプルデータポイントを使用してプログラムを実行します。

./linear_regression

出力例:

Enter x and y coordinates (enter -1 -1 to finish):
1 2
2 4
3 5
4 4
5 5
-1 -1

Linear Regression Results:
Number of points: 5
Slope (m): 0.6000
Y-Intercept (b): 2.2000
Equation: y = 0.6000x + 2.2000

線形回帰計算に関する要点:

傾きと切片を計算するために最小二乗法を使用します。
傾きの公式は: m = (n _ Σ(xy) - Σx _ Σy) / (n * Σ(x²) - (Σx)²)
y 切片の公式は: b = (Σy - m * Σx) / n
関数 computeLinearRegression() がこれらのパラメータを計算します。
メイン関数が回帰方程式を出力します。

y = mx + b の出力

このステップでは、線形回帰方程式を出力し、計算された傾きと切片を使用して y の値を予測する方法を学びます。

予測機能を追加するために linear_regression.c ファイルを更新します。

cd ~/project
nano linear_regression.c

前のコードを以下の実装に置き換えます。

#include <stdio.h>
#include <math.h>
#define MAX_POINTS 100

typedef struct {
    double x;
    double y;
} DataPoint;

void computeLinearRegression(DataPoint points[], int num_points, double *m, double *b) {
    double sum_x = 0, sum_y = 0, sum_xy = 0, sum_x_squared = 0;

    for (int i = 0; i < num_points; i++) {
        sum_x += points[i].x;
        sum_y += points[i].y;
        sum_xy += points[i].x * points[i].y;
        sum_x_squared += points[i].x * points[i].x;
    }

    double n = num_points;

    *m = (n * sum_xy - sum_x * sum_y) / (n * sum_x_squared - sum_x * sum_x);
    *b = (sum_y - (*m) * sum_x) / n;
}

// Function to predict y value
double predictY(double m, double b, double x) {
    return m * x + b;
}

int main() {
    DataPoint points[MAX_POINTS];
    int num_points = 0;

    printf("Enter x and y coordinates (enter -1 -1 to finish):\n");

    while (num_points < MAX_POINTS) {
        double x, y;
        scanf("%lf %lf", &x, &y);

        if (x == -1 && y == -1) {
            break;
        }

        points[num_points].x = x;
        points[num_points].y = y;
        num_points++;
    }

    double slope, intercept;
    computeLinearRegression(points, num_points, &slope, &intercept);

    printf("\nLinear Regression Equation:\n");
    printf("y = %.4fx + %.4f\n", slope, intercept);

    // Print prediction for sample x values
    printf("\nPredicted y values:\n");
    double test_x_values[] = {0, 2.5, 6, 10};
    for (int i = 0; i < 4; i++) {
        double predicted_y = predictY(slope, intercept, test_x_values[i]);
        printf("When x = %.2f, y = %.4f\n", test_x_values[i], predicted_y);
    }

    return 0;
}

プログラムをコンパイルします。

gcc -o linear_regression linear_regression.c -lm

サンプルデータポイントを使用してプログラムを実行します。

./linear_regression

出力例:

Enter x and y coordinates (enter -1 -1 to finish):
1 2
2 4
3 5
4 4
5 5
-1 -1

Linear Regression Equation:
y = 0.6000x + 2.2000

Predicted y values:
When x = 0.00, y = 2.2000
When x = 2.50, y = 3.7000
When x = 6.00, y = 5.8000
When x = 10.00, y = 8.2000

回帰方程式の出力に関する要点:

任意の x に対する y を計算する predictY() 関数を追加しました。
メイン関数が完全な方程式 y = mx + b を出力します。
異なる x 入力に対する y の値を表示することで予測をデモンストレーションします。
出力は線形回帰モデルを明確に視覚化します。

まとめ

この実験では、C 言語で線形回帰分析のための (x,y) データポイントを読み取る方法を学びました。複数のデータポイントの入力を可能にし、それらをさらなる計算のために保存するプログラムを作成しました。また、入力されたデータポイントを出力して検証する方法も学びました。

次に、線形回帰直線の傾き (m) と切片 (b) を計算する方法を学び、その後、y = mx + b の形式で方程式を出力する方法を学びます。