利用 R 进行多元回归估计
楚新元 / 2021-08-23
案例来源于古扎拉蒂《经济计量学精要》(第四版)第二章例 2-5 古董钟与拍卖价格。之所以选择此案例是因为此案例只有 3 个变量,32 个样本,便于手工验证。
读取数据
library(readxl)
library(dplyr)
library(kableExtra)
data = read_excel("data/Table2_14.xls", skip = 5)
data %>%
kable(
caption = "古董钟与拍卖价格",
format = "html"
) %>%
kable_styling(
bootstrap_options = "striped",
fixed_thead = TRUE,
font_size = 14,
) %>%
column_spec(1, width = "80px") %>%
column_spec(2:4, width = "120px")
Observation | Price | Age | Number of Bidders |
---|---|---|---|
1 | 1235 | 127 | 13 |
2 | 1080 | 115 | 12 |
3 | 845 | 127 | 7 |
4 | 1552 | 150 | 9 |
5 | 1047 | 156 | 6 |
6 | 1979 | 182 | 11 |
7 | 1822 | 156 | 12 |
8 | 1253 | 132 | 10 |
9 | 1297 | 137 | 9 |
10 | 946 | 113 | 9 |
11 | 1713 | 137 | 15 |
12 | 1024 | 117 | 11 |
13 | 2131 | 170 | 14 |
14 | 1550 | 182 | 8 |
15 | 1884 | 162 | 11 |
16 | 2041 | 184 | 10 |
17 | 854 | 143 | 6 |
18 | 1483 | 159 | 9 |
19 | 1055 | 108 | 14 |
20 | 1545 | 175 | 8 |
21 | 729 | 108 | 6 |
22 | 1792 | 179 | 9 |
23 | 1175 | 111 | 15 |
24 | 1593 | 187 | 8 |
25 | 1147 | 137 | 8 |
26 | 1092 | 153 | 6 |
27 | 1152 | 117 | 13 |
28 | 1336 | 126 | 10 |
29 | 785 | 111 | 7 |
30 | 744 | 115 | 7 |
31 | 1356 | 194 | 5 |
32 | 1262 | 168 | 7 |
变量含义
- Price:古董钟中标的价格
- Age:钟表的年代
- Number of Bidders:投标人的个数
利用 R 自带函数进行参数估计
fit = lm(Price ~ Age + `Number of Bidders`, data = data)
summary(fit)
##
## Call:
## lm(formula = Price ~ Age + `Number of Bidders`, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -208.60 -119.04 15.73 101.84 212.54
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1336.0493 175.2725 -7.623 2.10e-08 ***
## Age 12.7414 0.9124 13.965 2.09e-14 ***
## `Number of Bidders` 85.7641 8.8020 9.744 1.19e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 134.6 on 29 degrees of freedom
## Multiple R-squared: 0.8906, Adjusted R-squared: 0.8831
## F-statistic: 118.1 on 2 and 29 DF, p-value: 1.161e-14
手动计算参数值和各个统计量(含截距项)
生成矩阵 \(X\)
data %>%
mutate(X1 = 1) %>%
select(X1, Age, `Number of Bidders`) %>%
as.matrix() -> X
print(X)
## X1 Age Number of Bidders
## [1,] 1 127 13
## [2,] 1 115 12
## [3,] 1 127 7
## [4,] 1 150 9
## [5,] 1 156 6
## [6,] 1 182 11
## [7,] 1 156 12
## [8,] 1 132 10
## [9,] 1 137 9
## [10,] 1 113 9
## [11,] 1 137 15
## [12,] 1 117 11
## [13,] 1 170 14
## [14,] 1 182 8
## [15,] 1 162 11
## [16,] 1 184 10
## [17,] 1 143 6
## [18,] 1 159 9
## [19,] 1 108 14
## [20,] 1 175 8
## [21,] 1 108 6
## [22,] 1 179 9
## [23,] 1 111 15
## [24,] 1 187 8
## [25,] 1 137 8
## [26,] 1 153 6
## [27,] 1 117 13
## [28,] 1 126 10
## [29,] 1 111 7
## [30,] 1 115 7
## [31,] 1 194 5
## [32,] 1 168 7
生成矩阵 \(Y\)
Y = as.matrix(data$Price)
print(Y)
## [,1]
## [1,] 1235
## [2,] 1080
## [3,] 845
## [4,] 1552
## [5,] 1047
## [6,] 1979
## [7,] 1822
## [8,] 1253
## [9,] 1297
## [10,] 946
## [11,] 1713
## [12,] 1024
## [13,] 2131
## [14,] 1550
## [15,] 1884
## [16,] 2041
## [17,] 854
## [18,] 1483
## [19,] 1055
## [20,] 1545
## [21,] 729
## [22,] 1792
## [23,] 1175
## [24,] 1593
## [25,] 1147
## [26,] 1092
## [27,] 1152
## [28,] 1336
## [29,] 785
## [30,] 744
## [31,] 1356
## [32,] 1262
计算样本容量 \(n\)
和自由度 \(df\)
n = nrow(data)
k = ncol(X)
df = n - k
print(c(n = n, k = k, df = df))
## n k df
## 32 3 29
估计参数 \(\beta\)
Beta_hat = solve(t(X) %*% X) %*% t(X) %*% Y
colnames(Beta_hat) = "Beta_hat"
print(Beta_hat)
## Beta_hat
## X1 -1336.04929
## Age 12.74138
## Number of Bidders 85.76407
计算回归残差 \(e\)
residuals = Y - (X %*% Beta_hat)
colnames(residuals) = "residuals"
print(residuals)
## residuals
## [1,] -162.03929
## [2,] -78.37862
## [3,] -37.45489
## [4,] 204.96516
## [5,] -119.19094
## [6,] 52.71275
## [7,] 141.22465
## [8,] 49.54599
## [9,] 115.60314
## [10,] 70.39635
## [11,] 17.01874
## [12,] -74.09732
## [13,] 100.31715
## [14,] -118.99505
## [15,] 212.54042
## [16,] 174.99405
## [17,] -146.55296
## [18,] 21.29270
## [19,] -185.71707
## [20,] -34.80536
## [21,] 174.39547
## [22,] 75.46503
## [23,] -189.70529
## [24,] -139.70197
## [25,] 51.36721
## [26,] -35.96679
## [27,] -117.62546
## [28,] 208.99429
## [29,] 106.40725
## [30,] 14.44171
## [31,] -208.59945
## [32,] -142.85161
计算回归标准误 \(\hat{\sigma}\)
Sigma_hat = sqrt(sum(residuals^2) / df)
c(Sigma_hat = Sigma_hat)
## Sigma_hat
## 134.6083
计算 \(\beta\)
估计量的方差与标准误
Beta_hat_var = solve(t(X) %*% X) * (Sigma_hat)^2
Beta1_hat_se = sqrt(Beta_hat_var[1, 1])
Beta2_hat_se = sqrt(Beta_hat_var[2, 2])
Beta3_hat_se = sqrt(Beta_hat_var[3, 3])
c(
Beta1_hat_se = Beta1_hat_se,
Beta2_hat_se = Beta2_hat_se,
Beta3_hat_se = Beta3_hat_se
)
## Beta1_hat_se Beta2_hat_se Beta3_hat_se
## 175.2724965 0.9123559 8.8019949
计算 \(t\)
统计量
t_Beta1_hat = Beta_hat[1, 1] / Beta1_hat_se
t_Beta2_hat = Beta_hat[2, 1] / Beta2_hat_se
t_Beta3_hat = Beta_hat[3, 1] / Beta3_hat_se
c(
t_Beta1_hat = t_Beta1_hat,
t_Beta2_hat = t_Beta2_hat,
t_Beta3_hat = t_Beta3_hat
)
## t_Beta1_hat t_Beta2_hat t_Beta3_hat
## -7.622698 13.965366 9.743708
计算拟合优度 \(R^2\)
TSS = sum((Y - mean(Y))^2)
RSS = sum(residuals^2)
ESS = TSS - RSS
R_squared = 1 - RSS/TSS
c(R_squared = R_squared)
## R_squared
## 0.8906143
计算修正的拟合优度 \(Adjusted\ R^2\)
Adjusted_R_squared = 1 - (1 - R_squared) * (n - 1) / (n - k)
c(Adjusted_R_squared = Adjusted_R_squared)
## Adjusted_R_squared
## 0.8830705
计算 \(F\)
统计量
F_statistic = (R_squared / (k - 1)) /
((1 - R_squared) / (n - k))
c(F_statistic = F_statistic)
## F_statistic
## 118.0585