Tietokoneharjoitus 5

These data are taken from the 1980 Census. These data were provided by Professor William Evans of the University of Maryland and were used in his paper with Joshua Angrist Children and Thier Parents’ Labor Supply: Evidence from Exogenous Variation in Family Size, American Economic Review, June 1998, Vol. 88, No. 3, 450-477. The file Fertility.dta (in STATA format) contains data on 254,654 women between the age of 21 and 35. The data in Fertility are a subset of the data used in the Angrist-Evans paper. (The file Fertility_Small contains data on a 30,000 randomly selected women from the Fertility data set. This smaller dataset is provided for students with memory limitations on their computer software.)

Artikkeli loytyy osoitteista http://siteresources.worldbank.org/INTPUBSERV/Resources/Angrist_and_Evans.pdf http://siteresources.worldbank.org/INTPUBSERV/Resources/Angrist_and_Evans.pdf

(a) OLS regressio kun y-muuttuja on weeksworked ja x-muuttuja on morekids

file<-"http://cc.oulu.fi/~jklemela/econometrics/Fertility_small.csv"
data<-read.table(file,skip=1,sep=",")

y<-data[,9]   # weeksmom
x<-data[,1]   # morekids
reg.model<-lm(y ~ x)
summary(reg.model)


Call:
lm(formula = y ~ x)

Residuals:
   Min     1Q Median     3Q    Max 
-21.48 -21.48 -13.48  26.52  36.53 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  21.4782     0.1592   135.0   <2e-16 ***
x            -6.0082     0.2590   -23.2   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 

Residual standard error: 21.75 on 29998 degrees of freedom
Multiple R-squared: 0.01762,	Adjusted R-squared: 0.01759 
F-statistic: 538.2 on 1 and 29998 DF,  p-value: < 2.2e-16 

(b) IV regressio kun y-muuttuja on weeksworked, x-muuttuja on morekids ja instrumenttimumuuttuja on samesex

file<-"http://cc.oulu.fi/~jklemela/econometrics/Fertility_small.csv"
data<-read.table(file,skip=1,sep=",")

y<-data[,9]   # weeksworked
x<-data[,1]   # morekids
z<-data[,4]   # samesex

cor(x,z)

[1] 0.06891525

# IV-estimaattori

n<-length(x)
K<-2
Z<-matrix(0,n,K)
Z[,1]<-1
Z[,2]<-z
X<-matrix(0,n,K)
X[,1]<-1
X[,2]<-x

ztx<-t(Z)%*%X
invztx<-solve(ztx,diag(2))
b<-invztx%*%t(Z)%*%y
b

          [,1]
[1,] 21.487634
[2,] -6.033194

# 2SLS-estimaattori

n<-length(x)
K<-2
Z<-matrix(0,n,K)
Z[,1]<-1
Z[,2]<-z
X<-matrix(0,n,K)
X[,1]<-1
X[,2]<-x

# 1. askel
ZtZ<-t(Z)%*%Z
invZtZ<-solve(ZtZ,diag(rep(1,K)))
b1<-invZtZ%*%t(Z)%*%x
b1

[1,] 0.34397853
[2,] 0.06681975

# Toinen askel
n<-length(y)
K<-2
xhat<-Z%*%b1
Xhat<-matrix(0,n,K)
Xhat[,1]<-1
Xhat[,2]<-xhat

XtX<-t(Xhat)%*%X
invXtX<-solve(XtX,diag(rep(1,K)))
b<-invXtX%*%t(Xhat)%*%y
b

[1,] 21.487634
[2,] -6.033194

# hajontakuvio

plot(x,y)
segments(0,b[1],1,b[1]+b[2])

# histogrammat

hist(y)
y0<-y[(x==0)]
hist(y0)
x11()
y1<-y[(x==1)]
hist(y1)

sum(x==0)
#[1] 18672

(c) IV regressio kun y-muuttuja on weeksworked, x-muuttuja on morekids, instrumenttimumuuttuja on samesex seka agemom, black, hispan ja othrace ovat eksogeenisia selittavia muuttujia.

file<-"http://cc.oulu.fi/~jklemela/econometrics/Fertility_small.csv"
data<-read.table(file,skip=1,sep=",")

y<-data[,9]   #weeksworked
x<-data[,1]   #morekids
z<-data[,4]   #samesex
w1<-data[,5]  #agemom
w2<-data[,6]  #black
w3<-data[,7]  #hispan
w4<-data[,8]  #othrace

# IV-estimaattori

n<-length(x)
K<-6
Z<-matrix(0,n,K)
Z[,1]<-1
Z[,2]<-z
Z[,3]<-w1
Z[,4]<-w2
Z[,5]<-w3
Z[,6]<-w4

X<-matrix(0,n,K)
X[,1]<-1
X[,2]<-x
X[,3]<-w1
X[,4]<-w2
X[,5]<-w3
X[,6]<-w4

ztx<-t(Z)%*%X
invztx<-solve(ztx,diag(K))
b<-invztx%*%t(Z)%*%y
b

           [,1]
[1,] -4.3703424
[2,] -5.7807463
[3,]  0.8234973
[4,] 11.4262797
[5,] -0.4117677
[6,]  3.3077888

# 2sls
# Ensimmainen askel

n<-length(x)
K<-6
Z<-matrix(0,n,K)
Z[,1]<-1
Z[,2]<-z
Z[,3]<-w1
Z[,4]<-w2
Z[,5]<-w3
Z[,6]<-w4

ZtZ<-t(Z)%*%Z
invZtZ<-solve(ZtZ,diag(rep(1,K)))
b1<-invZtZ%*%t(Z)%*%x
b1

           [,1]
[1,] -0.17301671
[2,]  0.06786242
[3,]  0.01643797
[4,]  0.09626415
[5,]  0.14843272
[6,]  0.02352631

# Toinen askel

n<-length(y)
K<-6

xhat<-Z%*%b1
Xhat<-matrix(0,n,K)
Xhat[,1]<-1
Xhat[,2]<-xhat
Xhat[,3]<-w1
Xhat[,4]<-w2
Xhat[,5]<-w3
Xhat[,6]<-w4

XtX<-t(Xhat)%*%X
invXtX<-solve(XtX,diag(rep(1,K)))
b<-invXtX%*%t(Xhat)%*%y
b
          [,1]
[1,] -4.3703424
[2,] -5.7807463
[3,]  0.8234973
[4,] 11.4262797
[5,] -0.4117677
[6,]  3.3077888

Kokeillaan sem-pakettia


library(sem)

# pieni malli

model<-tsls(y ~ x , instruments = ~ z )
model

Model Formula: y ~ x

Instruments: ~z

Coefficients:
(Intercept)           x 
  21.487634   -6.033194 

# diagnostiikka

summary(model)

 2SLS Estimates

Model Formula: y ~ x

Instruments: ~z

Residuals:
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  -21.5   -21.5   -13.5     0.0    26.5    36.5 

            Estimate Std. Error t value Pr(>|t|)
(Intercept)   21.488      1.425  15.083   0.0000
x             -6.033      3.758  -1.605   0.1084

Residual standard error: 21.7472 on 29998 degrees of freedom


# kaikki muuttujat

model<-tsls(y ~ x+w1+w2+w3+w4 , instruments = ~ z+w1+w2+w3+w4 )
model

Model Formula: y ~ x + w1 + w2 + w3 + w4

Instruments: ~z + w1 + w2 + w3 + w4

Coefficients:
(Intercept)           x          w1          w2          w3          w4 
 -4.3703424  -5.7807463   0.8234973  11.4262797  -0.4117677   3.3077888 

summary(model)

2SLS Estimates

Model Formula: y ~ x + w1 + w2 + w3 + w4

Instruments: ~z + w1 + w2 + w3 + w4

Residuals:
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  -35.9   -17.9   -10.7     0.0    23.2    44.0 

            Estimate Std. Error t value  Pr(>|t|)
(Intercept)  -4.3703     1.2279 -3.5591 3.728e-04
x            -5.7807     3.6450 -1.5860 1.128e-01
w1            0.8235     0.0700 11.7651 0.000e+00
w2           11.4263     0.6552 17.4387 0.000e+00
w3           -0.4118     0.7480 -0.5505 5.820e-01
w4            3.3078     0.5959  5.5511 2.862e-08

Residual standard error: 21.4201 on 29994 degrees of freedom

# virheellinen input

tsls(y ~ x+w1+w2+w3+w4 , instruments = ~ z)

Error in chol.default(XtZ %*% invZtZ %*% t(XtZ)) : 
  the leading minor of order 3 is not positive definite


Kokeillaan SAS:ia

http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_syslin_sect007.htm


FILENAME 
myurl URL 'http://cc.oulu.fi/~jklemela/econometrics/Fertility_small.txt';
DATA Fertility;
  INFILE myurl firstobs=2;
  INPUT ind $ morekids boy1st boy2nd samesex agemom black hispan othrace 
  weeksworked;
RUN;

proc syslin data=Fertility 2sls;
      endogenous  morekids;
      instruments samesex;
      model weeksworked = morekids;
run;

 The SAS System           12:07 Wednesday, February 12, 2014   1

                                            The SYSLIN Procedure
                                     Two-Stage Least Squares Estimation

                                      Model                 weeksworked
                                      Dependent Variable    weeksworked


                                            Analysis of Variance

                                                  Sum of        Mean
                   Source                 DF     Squares      Square    F Value    Pr > F

                   Model                   1    1218.844    1218.844       2.58    0.1084
                   Error               29998    14187255    472.9400
                   Corrected Total     29999    14441766


                           Root MSE            21.74718    R-Square       0.00009
                           Dependent Mean      19.20950    Adj R-Sq       0.00005
                           Coeff Var          113.21057


                                             Parameter Estimates

                                             Parameter    Standard
                    Variable           DF     Estimate       Error    t Value    Pr > |t|

                    Intercept           1     21.48763    1.424628      15.08      <.0001
                    morekids            1     -6.03319    3.758169      -1.61      0.1084



###############################################################

FILENAME 
myurl URL 'http://cc.oulu.fi/~jklemela/econometrics/Fertility_small.txt';
DATA Fertility;
  INFILE myurl firstobs=2;
  INPUT ind $ morekids boy1st boy2nd samesex agemom black hispan othrace 
  weeksworked;
RUN;

proc syslin data=Fertility 2sls;
      endogenous  morekids;
      instruments samesex agemom black hispan othrace;
      model weeksworked = morekids agemom black hispan othrace;
run;


The SAS System           12:10 Wednesday, February 12, 2014   2

                                            The SYSLIN Procedure
                                     Two-Stage Least Squares Estimation

                                      Model                 weeksworked
                                      Dependent Variable    weeksworked


                                            Analysis of Variance

                                                  Sum of        Mean
                   Source                 DF     Squares      Square    F Value    Pr > F

                   Model                   5    361252.8    72250.56     157.47    <.0001
                   Error               29994    13761883    458.8212
                   Corrected Total     29999    14441766


                           Root MSE            21.42011    R-Square       0.02558
                           Dependent Mean      19.20950    Adj R-Sq       0.02542
                           Coeff Var          111.50791


                                             Parameter Estimates

                                             Parameter    Standard
                    Variable           DF     Estimate       Error    t Value    Pr > |t|

                    Intercept           1     -4.37034    1.227949      -3.56      0.0004
                    morekids            1     -5.78075    3.644973      -1.59      0.1128
                    agemom              1     0.823497    0.069995      11.77      <.0001
                    black               1     11.42628    0.655227      17.44      <.0001
                    hispan              1     -0.41177    0.748047      -0.55      0.5820
                    othrace             1     3.307789    0.595876       5.55      <.0001



###############################################################
Huom!

FILENAME 
myurl URL 'http://cc.oulu.fi/~jklemela/econometrics/Fertility_small.txt';
DATA Fertility;
  INFILE myurl firstobs=2;
  INPUT ind $ morekids boy1st boy2nd samesex agemom black hispan othrace 
  weeksworked;
RUN;

proc syslin data=Fertility 2sls;
      endogenous  morekids;
      instruments samesex;
      model weeksworked = morekids agemom black hispan othrace;
run;

The SAS System           12:10 Wednesday, February 12, 2014   3

                                            The SYSLIN Procedure
                                     Two-Stage Least Squares Estimation

                                      Model                 weeksworked
                                      Dependent Variable    weeksworked


                                            Analysis of Variance

                                                  Sum of        Mean
                   Source                 DF     Squares      Square    F Value    Pr > F

                   Model                   5    361252.8    72250.56     157.43    <.0001
                   Error               29994    13765597    458.9450
                   Corrected Total     29999    14441766


                           Root MSE            21.42300    R-Square       0.02557
                           Dependent Mean      19.20950    Adj R-Sq       0.02541
                           Coeff Var          111.52296


                                             Parameter Estimates

                                             Parameter    Standard
                    Variable           DF     Estimate       Error    t Value    Pr > |t|

                    Intercept           1     -1.35070    1.799170      -0.75      0.4528
                    morekids            1     -5.87095    3.702350      -1.59      0.1128
                    agemom              1     0.728474    0.036624      19.89      <.0001
                    black               1     10.86980    0.552211      19.68      <.0001
                    hispan              1     -1.26982    0.520349      -2.44      0.0147
                    othrace             1     3.171789    0.589436       5.38      <.0001