library(tidyverse)
#install package
#install.packages("ivpack")
#load package
library(ivpack)

Dataset : card.data

Data from the National Longitudinal Survey of Young Men (NLSYM) that was used by Card (1995), see the description. Nous considrons comme - outcome la variable lwage - traitement la variable educ - variable instrumentale (IV) la variable near 4 year college On veut dterminer l’effet causal du niveau d’ducation sur le niveau de revenu.

Questions :

  • Charger les donnes et vrifier les types de variables
data(card.data)
glimpse(card.data)
Observations: 3,010
Variables: 35
$ id       <int> 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 22, 25, 26, 29, 30, 3...
$ nearc2   <int> 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0...
$ nearc4   <int> 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ educ     <int> 7, 12, 12, 11, 12, 12, 18, 14, 12, 12, 9, 12, 11, 11, 16, 14, 12, 14, 10, 12, 18, 18,...
$ age      <int> 29, 27, 34, 27, 34, 26, 33, 29, 28, 29, 28, 26, 24, 30, 31, 24, 34, 29, 26, 32, 32, 3...
$ fatheduc <int> NA, 8, 14, 11, 8, 9, 14, 14, 12, 12, 11, 11, 11, 11, NA, 15, 12, NA, 8, 8, 12, NA, 5,...
$ motheduc <int> NA, 8, 12, 12, 7, 12, 14, 14, 12, 12, 12, 6, 6, 6, 8, 12, 8, 12, 8, 8, 13, 8, 14, 12,...
$ weight   <dbl> 158413, 380166, 367470, 380166, 367470, 380166, 367470, 496635, 367772, 480445, 38016...
$ momdad14 <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ sinmom14 <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ step14   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ reg661   <int> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1...
$ reg662   <int> 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0...
$ reg663   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ reg664   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ reg665   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ reg666   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ reg667   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ reg668   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ reg669   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ south66  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ black    <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ smsa     <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1...
$ south    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0...
$ smsa66   <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ wage     <int> 548, 481, 721, 250, 729, 500, 565, 608, 425, 515, 225, 400, 417, 217, 894, 300, 346, ...
$ enroll   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ KWW      <int> 15, 35, 42, 25, 34, 38, 41, 46, 32, 34, 29, 34, 22, 27, 43, 36, 40, 35, 24, 35, 43, 5...
$ IQ       <int> NA, 93, 103, 88, 108, 85, 119, 108, 96, 97, 84, 89, 93, 74, 116, NA, 93, 100, 91, 88,...
$ married  <int> 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 6, 1, 1, 6, 1, 1, 1, 1, 1, 1, 1, 6, 5, 1...
$ libcrd14 <int> 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ exper    <int> 16, 9, 16, 10, 16, 8, 9, 9, 10, 11, 13, 8, 7, 13, 9, 4, 16, 9, 10, 14, 8, 10, 11, 10,...
$ lwage    <dbl> 6.306275, 6.175867, 6.580639, 5.521461, 6.591674, 6.214608, 6.336826, 6.410175, 6.052...
$ expersq  <int> 256, 81, 256, 100, 256, 64, 81, 81, 100, 121, 169, 64, 49, 169, 81, 16, 256, 81, 100,...
$ region   <dbl> 661, 661, 661, 662, 662, 662, 662, 662, 662, 662, 669, 662, 662, 662, 662, 662, 662, ...
  • Rprsenter graphiquement les distributions de l’outcome et du traitement

ggplot(card.data,aes(educ)) + geom_histogram()
Warning messages:
1: In native_encode(options$fig.path) :
  some characters may not work under the current locale
2: In native_encode(options$fig.path) :
  some characters may not work under the current locale
3: In native_encode(options$fig.path) :
  some characters may not work under the current locale
4: In native_encode(options$fig.path) :
  some characters may not work under the current locale
5: In native_encode(options$fig.path) :
  some characters may not work under the current locale
6: In native_encode(options$fig.path) :
  some characters may not work under the current locale

  • Vrifier la force de la variable instrumentale
card.data %>% group_by(nearc4) %>% summarise(mean(educ))
Warning messages:
1: In native_encode(options$fig.path) :
  some characters may not work under the current locale
2: In native_encode(options$fig.path) :
  some characters may not work under the current locale
3: In native_encode(options$fig.path) :
  some characters may not work under the current locale
  • Binariser l’IV avec un cut 12 et calculer la proportion de “compliers”
card.data = card.data %>% mutate(educ12 = educ>12)
Warning messages:
1: In native_encode(options$fig.path) :
  some characters may not work under the current locale
2: In native_encode(options$fig.path) :
  some characters may not work under the current locale
3: In native_encode(options$fig.path) :
  some characters may not work under the current locale
4: In native_encode(options$fig.path) :
  some characters may not work under the current locale
5: In native_encode(options$fig.path) :
  some characters may not work under the current locale
6: In native_encode(options$fig.path) :
  some characters may not work under the current locale
7: In native_encode(options$fig.path) :
  some characters may not work under the current locale
8: In native_encode(options$fig.path) :
  some characters may not work under the current locale
card.data %>% group_by(nearc4) %>% summarise(moy = mean(educ12)) 
propcomp = card.data %>% group_by(nearc4) %>% summarise(moy = mean(educ12)) %>% pull(moy) %*% c(-1,1)
propcomp
          [,1]
[1,] 0.1219293
  • Estimer l’ITT (effet de l’intention de traiter)
card.data %>% group_by(nearc4) %>% summarise(moy = mean(lwage)) 
Warning messages:
1: In native_encode(options$fig.path) :
  some characters may not work under the current locale
2: In native_encode(options$fig.path) :
  some characters may not work under the current locale
3: In native_encode(options$fig.path) :
  some characters may not work under the current locale
4: In native_encode(options$fig.path) :
  some characters may not work under the current locale
5: In native_encode(options$fig.path) :
  some characters may not work under the current locale
6: In native_encode(options$fig.path) :
  some characters may not work under the current locale
7: In native_encode(options$fig.path) :
  some characters may not work under the current locale
8: In native_encode(options$fig.path) :
  some characters may not work under the current locale
itt = card.data %>% group_by(nearc4) %>% summarise(moy = mean(lwage)) %>% pull(moy) %*% c(-1,1)
itt
          [,1]
[1,] 0.1559075
  • Estimer le CACE
CACE
         [,1]
[1,] 1.278672
  • Vrifier que l’on obtient la mme estimation avec la stratgie 2SLS
# Stage 1
s1<-lm(educ12~nearc4,card.data)
predtx <-predict(s1, type = "response")
table(predtx)
predtx
0.422152560083588  0.54408183146614 
              957              2053 
#Stage 2
lm(card.data$lwage~predtx)

Call:
lm(formula = card.data$lwage ~ predtx)

Coefficients:
(Intercept)       predtx  
      5.616        1.279  
  • Faire de mme en utilisant le package ivpack
ivmodel1=ivreg(lwage ~ educ12, ~ nearc4, x=TRUE, data=card.data)
summary(ivmodel1)

Call:
ivreg(formula = lwage ~ educ12 | nearc4, data = card.data, x = TRUE)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.28920 -0.53653 -0.03694  0.58481  2.04111 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   5.6157     0.1133  49.568  < 2e-16 ***
educ12TRUE    1.2787     0.2228   5.739 1.05e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.6941 on 3008 degrees of freedom
Multiple R-Squared: -1.445, Adjusted R-squared: -1.446 
Wald test: 32.94 on 1 and 3008 DF,  p-value: 1.047e-08 
robust.se(ivmodel1)
[1] "Robust Standard Errors"

t test of coefficients:

            Estimate Std. Error t value  Pr(>|t|)    
(Intercept)  5.61570    0.11172 50.2637 < 2.2e-16 ***
educ12TRUE   1.27867    0.22036  5.8026  7.21e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  • Recommencer en prenant prenant en compte les variables exper, reg661, reg662, reg663, reg664, reg665, reg666, reg667 et reg668
ivmodel2=ivreg(lwage ~ educ12 + exper + reg661 + reg662 + reg663 + reg664 + 
                 reg665+ reg666 + reg667 + reg668,
               ~ nearc4 + exper + reg661+ reg662 + reg663 + reg664 +
                 reg665 + reg666 +reg667 + reg668, 
               x=TRUE, data=card.data)
There were 11 warnings (use warnings() to see them)
summary(ivmodel2)

Call:
ivreg(formula = lwage ~ educ12 + exper + reg661 + reg662 + reg663 + 
    reg664 + reg665 + reg666 + reg667 + reg668 | nearc4 + exper + 
    reg661 + reg662 + reg663 + reg664 + reg665 + reg666 + reg667 + 
    reg668, data = card.data, x = TRUE)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.55402 -0.39886 -0.01746  0.41183  1.69659 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  4.950242   0.366645  13.501  < 2e-16 ***
educ12TRUE   1.203660   0.307557   3.914 9.29e-05 ***
exper        0.081452   0.019639   4.148 3.45e-05 ***
reg661       0.009228   0.072198   0.128 0.898303    
reg662       0.092530   0.055866   1.656 0.097770 .  
reg663       0.109208   0.053864   2.027 0.042703 *  
reg664      -0.023602   0.061442  -0.384 0.700905    
reg665      -0.111628   0.065858  -1.695 0.090184 .  
reg666      -0.135042   0.070416  -1.918 0.055234 .  
reg667      -0.088088   0.068126  -1.293 0.196108    
reg668      -0.256740   0.074531  -3.445 0.000579 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.5709 on 2999 degrees of freedom
Multiple R-Squared: -0.6493,    Adjusted R-squared: -0.6548 
Wald test: 17.45 on 10 and 2999 DF,  p-value: < 2.2e-16 
robust.se(ivmodel2)
[1] "Robust Standard Errors"

t test of coefficients:

             Estimate Std. Error t value  Pr(>|t|)    
(Intercept)  4.950242   0.363037 13.6357 < 2.2e-16 ***
educ12TRUE   1.203660   0.304549  3.9523 7.920e-05 ***
exper        0.081452   0.019441  4.1898 2.873e-05 ***
reg661       0.009228   0.072869  0.1266 0.8992345    
reg662       0.092530   0.056105  1.6492 0.0992065 .  
reg663       0.109208   0.054991  1.9859 0.0471314 *  
reg664      -0.023602   0.062978 -0.3748 0.7078594    
reg665      -0.111628   0.065945 -1.6927 0.0906084 .  
reg666      -0.135042   0.070452 -1.9168 0.0553593 .  
reg667      -0.088088   0.068514 -1.2857 0.1986517    
reg668      -0.256740   0.071859 -3.5729 0.0003587 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  • Y a-t-il un effet du niveau d’ducation sur le niveau de revenu ?
LS0tCnRpdGxlOiAiRXhlbXBsZXMgcG91ciBsYSBjYXVzYWxpdMOpIDogaW5zdHJ1bWVudGFsIHZhcmlhYmxlcyIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQpgYGB7cn0KbGlicmFyeSh0aWR5dmVyc2UpCiNpbnN0YWxsIHBhY2thZ2UKI2luc3RhbGwucGFja2FnZXMoIml2cGFjayIpCiNsb2FkIHBhY2thZ2UKbGlicmFyeShpdnBhY2spCgpgYGAKCiMgRGF0YXNldCA6IGNhcmQuZGF0YQpEYXRhIGZyb20gdGhlIE5hdGlvbmFsIExvbmdpdHVkaW5hbCBTdXJ2ZXkgb2YgWW91bmcgTWVuIChOTFNZTSkgdGhhdCB3YXMgdXNlZCBieSBDYXJkICgxOTk1KSwgc2VlIHRoZSBkZXNjcmlwdGlvbi4gTm91cyBjb25zaWTDqXJvbnMgY29tbWUgCi0gb3V0Y29tZSBsYSB2YXJpYWJsZSBgbHdhZ2VgCi0gdHJhaXRlbWVudCBsYSB2YXJpYWJsZSBgZWR1Y2AKLSB2YXJpYWJsZSBpbnN0cnVtZW50YWxlIChJVikgbGEgdmFyaWFibGUgYG5lYXIgNCB5ZWFyIGNvbGxlZ2VgCk9uIHZldXQgZMOpdGVybWluZXIgbCdlZmZldCBjYXVzYWwgZHUgbml2ZWF1IGQnw6lkdWNhdGlvbiBzdXIgbGUgbml2ZWF1IGRlIHJldmVudS4KCiMjIFF1ZXN0aW9ucyA6IAotIENoYXJnZXIgbGVzIGRvbm7DqWVzIGV0IHbDqXJpZmllciBsZXMgdHlwZXMgZGUgdmFyaWFibGVzCmBgYHtyfQpkYXRhKGNhcmQuZGF0YSkKZ2xpbXBzZShjYXJkLmRhdGEpCmBgYAotIFLDqXByw6lzZW50ZXIgZ3JhcGhpcXVlbWVudCBsZXMgZGlzdHJpYnV0aW9ucyBkZSBsJ291dGNvbWUgZXQgZHUgdHJhaXRlbWVudApgYGB7cn0KZ2dwbG90KGNhcmQuZGF0YSxhZXMobHdhZ2UpKSArIGdlb21faGlzdG9ncmFtKCkKYGBgCmBgYHtyfQpnZ3Bsb3QoY2FyZC5kYXRhLGFlcyhlZHVjKSkgKyBnZW9tX2hpc3RvZ3JhbSgpCmBgYAotIFbDqXJpZmllciBsYSBmb3JjZSBkZSBsYSB2YXJpYWJsZSBpbnN0cnVtZW50YWxlCmBgYHtyfQpjYXJkLmRhdGEgJT4lIGdyb3VwX2J5KG5lYXJjNCkgJT4lIHN1bW1hcmlzZShtZWFuKGVkdWMpKQpgYGAKLSBCaW5hcmlzZXIgbCdJViBhdmVjIHVuIGN1dCDDoCAxMiBldCBjYWxjdWxlciBsYSBwcm9wb3J0aW9uIGRlICJjb21wbGllcnMiCmBgYHtyfQpjYXJkLmRhdGEgPSBjYXJkLmRhdGEgJT4lIG11dGF0ZShlZHVjMTIgPSBlZHVjPjEyKQpjYXJkLmRhdGEgJT4lIGdyb3VwX2J5KG5lYXJjNCkgJT4lIHN1bW1hcmlzZShtb3kgPSBtZWFuKGVkdWMxMikpIApwcm9wY29tcCA9IGNhcmQuZGF0YSAlPiUgZ3JvdXBfYnkobmVhcmM0KSAlPiUgc3VtbWFyaXNlKG1veSA9IG1lYW4oZWR1YzEyKSkgJT4lIHB1bGwobW95KSAlKiUgYygtMSwxKQpwcm9wY29tcApgYGAKLSBFc3RpbWVyIGwnSVRUIChlZmZldCBkZSBsJ2ludGVudGlvbiBkZSB0cmFpdGVyKQpgYGB7cn0KY2FyZC5kYXRhICU+JSBncm91cF9ieShuZWFyYzQpICU+JSBzdW1tYXJpc2UobW95ID0gbWVhbihsd2FnZSkpIAppdHQgPSBjYXJkLmRhdGEgJT4lIGdyb3VwX2J5KG5lYXJjNCkgJT4lIHN1bW1hcmlzZShtb3kgPSBtZWFuKGx3YWdlKSkgJT4lIHB1bGwobW95KSAlKiUgYygtMSwxKQppdHQKYGBgCgotIEVzdGltZXIgbGUgQ0FDRQpgYGB7cn0KQ0FDRSA9IGl0dC9wcm9wY29tcApDQUNFCmBgYAoKLSBWw6lyaWZpZXIgcXVlIGwnb24gb2J0aWVudCBsYSBtw6ptZSBlc3RpbWF0aW9uIGF2ZWMgbGEgc3RyYXTDqWdpZSAyU0xTCmBgYHtyfQojIFN0YWdlIDEKczE8LWxtKGVkdWMxMn5uZWFyYzQsY2FyZC5kYXRhKQpwcmVkdHggPC1wcmVkaWN0KHMxLCB0eXBlID0gInJlc3BvbnNlIikKdGFibGUocHJlZHR4KQoKI1N0YWdlIDIKbG0oY2FyZC5kYXRhJGx3YWdlfnByZWR0eCkKCmBgYAoKLSBGYWlyZSBkZSBtw6ptZSBlbiB1dGlsaXNhbnQgbGUgcGFja2FnZSBgaXZwYWNrYApgYGB7cn0KaXZtb2RlbDE9aXZyZWcobHdhZ2UgfiBlZHVjMTIsIH4gbmVhcmM0LCB4PVRSVUUsIGRhdGE9Y2FyZC5kYXRhKQpzdW1tYXJ5KGl2bW9kZWwxKQpyb2J1c3Quc2UoaXZtb2RlbDEpCmBgYAoKLSBSZWNvbW1lbmNlciBlbiBwcmVuYW50IHByZW5hbnQgZW4gY29tcHRlIGxlcyB2YXJpYWJsZXMgYGV4cGVyYCwgYHJlZzY2MWAsIGByZWc2NjJgLCBgcmVnNjYzYCwgYHJlZzY2NGAsIGByZWc2NjVgLCBgcmVnNjY2YCwgYHJlZzY2N2AgZXQgYHJlZzY2OGAKYGBge3J9Cml2bW9kZWwyPWl2cmVnKGx3YWdlIH4gZWR1YzEyICsgZXhwZXIgKyByZWc2NjEgKyByZWc2NjIgKyByZWc2NjMgKyByZWc2NjQgKyAKICAgICAgICAgICAgICAgICByZWc2NjUrIHJlZzY2NiArIHJlZzY2NyArIHJlZzY2OCwKICAgICAgICAgICAgICAgfiBuZWFyYzQgKyBleHBlciArIHJlZzY2MSsgcmVnNjYyICsgcmVnNjYzICsgcmVnNjY0ICsKICAgICAgICAgICAgICAgICByZWc2NjUgKyByZWc2NjYgK3JlZzY2NyArIHJlZzY2OCwgCiAgICAgICAgICAgICAgIHg9VFJVRSwgZGF0YT1jYXJkLmRhdGEpCnN1bW1hcnkoaXZtb2RlbDIpCnJvYnVzdC5zZShpdm1vZGVsMikKYGBgCi0gWSBhLXQtaWwgdW4gZWZmZXQgZHUgbml2ZWF1IGQnw6lkdWNhdGlvbiBzdXIgbGUgbml2ZWF1IGRlIHJldmVudSA/CgoKCg==