These data are taken from the 1980 Census. These data were provided by Professor William Evans of the University of Maryland and were used in his paper with Joshua Angrist Children and Thier Parents’ Labor Supply: Evidence from Exogenous Variation in Family Size, American Economic Review, June 1998, Vol. 88, No. 3, 450-477. The file Fertility.dta (in STATA format) contains data on 254,654 women between the age of 21 and 35. The data in Fertility are a subset of the data used in the Angrist-Evans paper. (The file Fertility_Small contains data on a 30,000 randomly selected women from the Fertility data set. This smaller dataset is provided for students with memory limitations on their computer software.)
Artikkeli loytyy osoitteista http://siteresources.worldbank.org/INTPUBSERV/Resources/Angrist_and_Evans.pdf http://siteresources.worldbank.org/INTPUBSERV/Resources/Angrist_and_Evans.pdf
(a) OLS regressio kun y-muuttuja on weeksworked ja x-muuttuja on morekids
file<-"http://cc.oulu.fi/~jklemela/econometrics/Fertility_small.csv" data<-read.table(file,skip=1,sep=",") y<-data[,9] # weeksmom x<-data[,1] # morekids reg.model<-lm(y ~ x) summary(reg.model) Call: lm(formula = y ~ x) Residuals: Min 1Q Median 3Q Max -21.48 -21.48 -13.48 26.52 36.53 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 21.4782 0.1592 135.0 <2e-16 *** x -6.0082 0.2590 -23.2 <2e-16 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 21.75 on 29998 degrees of freedom Multiple R-squared: 0.01762, Adjusted R-squared: 0.01759 F-statistic: 538.2 on 1 and 29998 DF, p-value: < 2.2e-16
(b) IV regressio kun y-muuttuja on weeksworked, x-muuttuja on morekids ja instrumenttimumuuttuja on samesex
file<-"http://cc.oulu.fi/~jklemela/econometrics/Fertility_small.csv" data<-read.table(file,skip=1,sep=",") y<-data[,9] # weeksworked x<-data[,1] # morekids z<-data[,4] # samesex cor(x,z) [1] 0.06891525 # IV-estimaattori n<-length(x) K<-2 Z<-matrix(0,n,K) Z[,1]<-1 Z[,2]<-z X<-matrix(0,n,K) X[,1]<-1 X[,2]<-x ztx<-t(Z)%*%X invztx<-solve(ztx,diag(2)) b<-invztx%*%t(Z)%*%y b [,1] [1,] 21.487634 [2,] -6.033194 # 2SLS-estimaattori n<-length(x) K<-2 Z<-matrix(0,n,K) Z[,1]<-1 Z[,2]<-z X<-matrix(0,n,K) X[,1]<-1 X[,2]<-x # 1. askel ZtZ<-t(Z)%*%Z invZtZ<-solve(ZtZ,diag(rep(1,K))) b1<-invZtZ%*%t(Z)%*%x b1 [1,] 0.34397853 [2,] 0.06681975 # Toinen askel n<-length(y) K<-2 xhat<-Z%*%b1 Xhat<-matrix(0,n,K) Xhat[,1]<-1 Xhat[,2]<-xhat XtX<-t(Xhat)%*%X invXtX<-solve(XtX,diag(rep(1,K))) b<-invXtX%*%t(Xhat)%*%y b [1,] 21.487634 [2,] -6.033194 # hajontakuvio plot(x,y) segments(0,b[1],1,b[1]+b[2]) # histogrammat hist(y) y0<-y[(x==0)] hist(y0) x11() y1<-y[(x==1)] hist(y1) sum(x==0) #[1] 18672
(c) IV regressio kun y-muuttuja on weeksworked, x-muuttuja on morekids, instrumenttimumuuttuja on samesex seka agemom, black, hispan ja othrace ovat eksogeenisia selittavia muuttujia.
file<-"http://cc.oulu.fi/~jklemela/econometrics/Fertility_small.csv" data<-read.table(file,skip=1,sep=",") y<-data[,9] #weeksworked x<-data[,1] #morekids z<-data[,4] #samesex w1<-data[,5] #agemom w2<-data[,6] #black w3<-data[,7] #hispan w4<-data[,8] #othrace # IV-estimaattori n<-length(x) K<-6 Z<-matrix(0,n,K) Z[,1]<-1 Z[,2]<-z Z[,3]<-w1 Z[,4]<-w2 Z[,5]<-w3 Z[,6]<-w4 X<-matrix(0,n,K) X[,1]<-1 X[,2]<-x X[,3]<-w1 X[,4]<-w2 X[,5]<-w3 X[,6]<-w4 ztx<-t(Z)%*%X invztx<-solve(ztx,diag(K)) b<-invztx%*%t(Z)%*%y b [,1] [1,] -4.3703424 [2,] -5.7807463 [3,] 0.8234973 [4,] 11.4262797 [5,] -0.4117677 [6,] 3.3077888 # 2sls # Ensimmainen askel n<-length(x) K<-6 Z<-matrix(0,n,K) Z[,1]<-1 Z[,2]<-z Z[,3]<-w1 Z[,4]<-w2 Z[,5]<-w3 Z[,6]<-w4 ZtZ<-t(Z)%*%Z invZtZ<-solve(ZtZ,diag(rep(1,K))) b1<-invZtZ%*%t(Z)%*%x b1 [,1] [1,] -0.17301671 [2,] 0.06786242 [3,] 0.01643797 [4,] 0.09626415 [5,] 0.14843272 [6,] 0.02352631 # Toinen askel n<-length(y) K<-6 xhat<-Z%*%b1 Xhat<-matrix(0,n,K) Xhat[,1]<-1 Xhat[,2]<-xhat Xhat[,3]<-w1 Xhat[,4]<-w2 Xhat[,5]<-w3 Xhat[,6]<-w4 XtX<-t(Xhat)%*%X invXtX<-solve(XtX,diag(rep(1,K))) b<-invXtX%*%t(Xhat)%*%y b [,1] [1,] -4.3703424 [2,] -5.7807463 [3,] 0.8234973 [4,] 11.4262797 [5,] -0.4117677 [6,] 3.3077888
Kokeillaan sem-pakettia
library(sem) # pieni malli model<-tsls(y ~ x , instruments = ~ z ) model Model Formula: y ~ x Instruments: ~z Coefficients: (Intercept) x 21.487634 -6.033194 # diagnostiikka summary(model) 2SLS Estimates Model Formula: y ~ x Instruments: ~z Residuals: Min. 1st Qu. Median Mean 3rd Qu. Max. -21.5 -21.5 -13.5 0.0 26.5 36.5 Estimate Std. Error t value Pr(>|t|) (Intercept) 21.488 1.425 15.083 0.0000 x -6.033 3.758 -1.605 0.1084 Residual standard error: 21.7472 on 29998 degrees of freedom # kaikki muuttujat model<-tsls(y ~ x+w1+w2+w3+w4 , instruments = ~ z+w1+w2+w3+w4 ) model Model Formula: y ~ x + w1 + w2 + w3 + w4 Instruments: ~z + w1 + w2 + w3 + w4 Coefficients: (Intercept) x w1 w2 w3 w4 -4.3703424 -5.7807463 0.8234973 11.4262797 -0.4117677 3.3077888 summary(model) 2SLS Estimates Model Formula: y ~ x + w1 + w2 + w3 + w4 Instruments: ~z + w1 + w2 + w3 + w4 Residuals: Min. 1st Qu. Median Mean 3rd Qu. Max. -35.9 -17.9 -10.7 0.0 23.2 44.0 Estimate Std. Error t value Pr(>|t|) (Intercept) -4.3703 1.2279 -3.5591 3.728e-04 x -5.7807 3.6450 -1.5860 1.128e-01 w1 0.8235 0.0700 11.7651 0.000e+00 w2 11.4263 0.6552 17.4387 0.000e+00 w3 -0.4118 0.7480 -0.5505 5.820e-01 w4 3.3078 0.5959 5.5511 2.862e-08 Residual standard error: 21.4201 on 29994 degrees of freedom # virheellinen input tsls(y ~ x+w1+w2+w3+w4 , instruments = ~ z) Error in chol.default(XtZ %*% invZtZ %*% t(XtZ)) : the leading minor of order 3 is not positive definite
Kokeillaan SAS:ia
http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_syslin_sect007.htm
FILENAME myurl URL 'http://cc.oulu.fi/~jklemela/econometrics/Fertility_small.txt'; DATA Fertility; INFILE myurl firstobs=2; INPUT ind $ morekids boy1st boy2nd samesex agemom black hispan othrace weeksworked; RUN; proc syslin data=Fertility 2sls; endogenous morekids; instruments samesex; model weeksworked = morekids; run; The SAS System 12:07 Wednesday, February 12, 2014 1 The SYSLIN Procedure Two-Stage Least Squares Estimation Model weeksworked Dependent Variable weeksworked Analysis of Variance Sum of Mean Source DF Squares Square F Value Pr > F Model 1 1218.844 1218.844 2.58 0.1084 Error 29998 14187255 472.9400 Corrected Total 29999 14441766 Root MSE 21.74718 R-Square 0.00009 Dependent Mean 19.20950 Adj R-Sq 0.00005 Coeff Var 113.21057 Parameter Estimates Parameter Standard Variable DF Estimate Error t Value Pr > |t| Intercept 1 21.48763 1.424628 15.08 <.0001 morekids 1 -6.03319 3.758169 -1.61 0.1084 ############################################################### FILENAME myurl URL 'http://cc.oulu.fi/~jklemela/econometrics/Fertility_small.txt'; DATA Fertility; INFILE myurl firstobs=2; INPUT ind $ morekids boy1st boy2nd samesex agemom black hispan othrace weeksworked; RUN; proc syslin data=Fertility 2sls; endogenous morekids; instruments samesex agemom black hispan othrace; model weeksworked = morekids agemom black hispan othrace; run; The SAS System 12:10 Wednesday, February 12, 2014 2 The SYSLIN Procedure Two-Stage Least Squares Estimation Model weeksworked Dependent Variable weeksworked Analysis of Variance Sum of Mean Source DF Squares Square F Value Pr > F Model 5 361252.8 72250.56 157.47 <.0001 Error 29994 13761883 458.8212 Corrected Total 29999 14441766 Root MSE 21.42011 R-Square 0.02558 Dependent Mean 19.20950 Adj R-Sq 0.02542 Coeff Var 111.50791 Parameter Estimates Parameter Standard Variable DF Estimate Error t Value Pr > |t| Intercept 1 -4.37034 1.227949 -3.56 0.0004 morekids 1 -5.78075 3.644973 -1.59 0.1128 agemom 1 0.823497 0.069995 11.77 <.0001 black 1 11.42628 0.655227 17.44 <.0001 hispan 1 -0.41177 0.748047 -0.55 0.5820 othrace 1 3.307789 0.595876 5.55 <.0001 ############################################################### Huom! FILENAME myurl URL 'http://cc.oulu.fi/~jklemela/econometrics/Fertility_small.txt'; DATA Fertility; INFILE myurl firstobs=2; INPUT ind $ morekids boy1st boy2nd samesex agemom black hispan othrace weeksworked; RUN; proc syslin data=Fertility 2sls; endogenous morekids; instruments samesex; model weeksworked = morekids agemom black hispan othrace; run; The SAS System 12:10 Wednesday, February 12, 2014 3 The SYSLIN Procedure Two-Stage Least Squares Estimation Model weeksworked Dependent Variable weeksworked Analysis of Variance Sum of Mean Source DF Squares Square F Value Pr > F Model 5 361252.8 72250.56 157.43 <.0001 Error 29994 13765597 458.9450 Corrected Total 29999 14441766 Root MSE 21.42300 R-Square 0.02557 Dependent Mean 19.20950 Adj R-Sq 0.02541 Coeff Var 111.52296 Parameter Estimates Parameter Standard Variable DF Estimate Error t Value Pr > |t| Intercept 1 -1.35070 1.799170 -0.75 0.4528 morekids 1 -5.87095 3.702350 -1.59 0.1128 agemom 1 0.728474 0.036624 19.89 <.0001 black 1 10.86980 0.552211 19.68 <.0001 hispan 1 -1.26982 0.520349 -2.44 0.0147 othrace 1 3.171789 0.589436 5.38 <.0001