#delim ; program define dfregcv, eclass; version 6.0; if replay() {; if "`e(cmd)'" != "dfregcv" {; exit 301; }; Replay `0'; }; else Estimate `0'; end; program define Estimate, eclass; syntax varlist(min=2 max=2 numeric) [if] [in], twpairv(varname) twentry(varname) mzdummy(varname) rdz(real) [dfmeth(string) covars(varlist) keepvar]; if "`dfmeth'" == "" {; local dfmeth "varest"; }; if "`dfmeth'" != "varest" & "`dfmeth'" != "noadjust" & "`dfmeth'" != "times2" & "`dfmeth'" != "gmm" { di in red "Option dfmeth must be one of the following (default: varest):"; di in ye " noadjust " in gr "(no adjustment of double entry results)"; di in ye " varest " in gr "(asympt var estimation)"; di in ye " times2 " in gr "(variance times two)"; di in ye " gmm " in gr "(gmm estimator)"; exit; }; marksample touse; tokenize `varlist'; local depv1 `1'; local depv2 `2'; preserve; cap drop DF*; cap drop eDF*; cap drop mDF*; * test double entry data; qui egen DFtwcnt = count(`twentry') if `touse', by(`twpairv'); qui su DFtwcnt if `touse'; if r(min) != 2 & r(max) != 2 {; di in red "Data is NOT double entry data"; exit; }; * test mzdummy is zero or one; qui g DFmztst = (`mzdummy' == 0) | (`mzdummy' == 1) if `touse'; qui su DFmztst if `touse'; if r(min) != 1 & r(max) != 1 {; di in red "Dummy for MZ twins (`mzdummy') is not correct"; exit; }; * drop matrices; dfdrmat `minstr' `maxstr'; * generate variables; qui g DFy = `depv1' if `touse'; qui g DFc2 = `depv2' if `touse' ; qui g DFr = (`mzdummy' == 0) * `rdz' + (`mzdummy' == 1) if `touse' ; qui g DFh2 = DFr * `depv2' if `touse' ; qui g DFcns = 1 if `touse'; if "`covars'" != "" {; local i = 1; tokenize `covars'; while "``i''" != "" {; qui g DFcov`i' = ``i'' if `touse'; local i = `i' + 1; }; unab DFcovs: DFcov*; }; * DF regression, no adjustment; qui regress DFy DFc2 DFr DFh2 `DFcovs' DFcns if `touse', noconst; qui predict DFeps if e(sample), resid; * get coef matrix; mat DFb0 = e(b); mat DFV0 = e(V); local DFnobs = e(N); local mss = e(mss); local df_m = e(df_m); local rss = e(rss); local df_r = e(df_r); local r2 = e(r2); * estimates without adjustment; if "`dfmeth'" == "noadjust" {; mat DFb0n = DFb0; mat DFV0n = DFV0; est post DFb0n DFV0n, dep("DFy") obs(`DFnobs') esample(`touse'); }; * estimates with std errors times two; if "`dfmeth'" == "times2" {; mat DFb0t = DFb0; mat DFV0t = 2 * DFV0; est post DFb0t DFV0t, dep("DFy") obs(`DFnobs') esample(`touse'); }; * estimates with adjusted standard errors; if "`dfmeth'" == "varest" {; * interaction with epsilon; for Z in var DFc2 DFr DFh2 `DFcovs' DFcns, noheader: qui g eZ = Z * DFeps if `touse'; * sum up opservation; for Z in var DFc2 DFr DFh2 `DFcovs' DFcns, noheader: qui egen mZ = sum(eZ) if `touse', by(`twpairv'); if "`covars'" != "" {; unab mDFcovs: mDFcov*; }; * accum interaction terms; * (note: the 1/2 below is necessary because each obs is accum twice); qui mat acc DFxeex2 = mDFc2 mDFr mDFh2 `mDFcovs' mDFcns if `touse', noconst; mat DFxeex = 1/2 * DFxeex2; * accum xxs; qui mat acc DFxx = DFc2 DFr DFh2 `DFcovs' DFcns if `touse', noconst; mat DFxxi = syminv(DFxx); mat DFb0a = DFb0; mat DFV0a = DFxxi * DFxeex * DFxxi; est post DFb0a DFV0a, dep("DFy") obs(`DFnobs') esample(`touse'); }; * GMM estimation; if "`dfmeth'" == "gmm" {; * count mz and dz twins in each strata (double entry!); qui su `mzdummy' if `touse'; mat DFnod = r(N) - r(sum) ; mat DFnom = r(sum) ; * interaction with epsilon; for Z in var DFc2 DFr DFh2 `DFcovs' DFcns, noheader: qui g eZ = Z * DFeps if `touse'; * sum up opservation; for Z in var DFc2 DFr DFh2 `DFcovs' DFcns, noheader: qui egen mZ = sum(eZ) if `touse', by(`twpairv'); if "`covars'" != "" {; unab mDFcovs: mDFcov*; }; * estimate moment correlations by strata; qui mat acc DFomd = mDFc2 mDFr mDFh2 `mDFcovs' mDFcns if `touse' & (`mzdummy' == 0), noconst ; mat DFomd = 1/2 * DFomd ; qui mat acc DFomm = mDFc2 mDFr mDFh2 `mDFcovs' mDFcns if `touse' & (`mzdummy' == 1), noconst ; mat DFomm = 1/2 * DFomm; * calculate inverse in gamma; matginv DFomd, ginv(DFoid) ; matginv DFomm, ginv(DFoim) ; * accumulate X prime X's ; qui mat acc DFzxd = DFy DFc2 DFr DFh2 `DFcovs' DFcns if `touse' & (`mzdummy' == 0), noconst ; mat DFxxd = DFzxd[2...,2...] ; mat DFxyd = DFzxd[2...,1] ; qui mat acc DFzxm = DFy DFc2 DFr DFh2 `DFcovs' DFcns if `touse' & (`mzdummy' == 1), noconst ; mat DFxxm = DFzxm[2...,2...] ; mat DFxym = DFzxm[2...,1] ; mat DFxxall = DFxxd * DFoid * DFxxd + DFxxm * DFoim * DFxxm; mat DFxyall = DFxxd * DFoid * DFxyd + DFxxm * DFoim * DFxym; * calculate beta; mat DFxxai = syminv(DFxxall); mat DFbgmm = DFxxai * DFxyall; mat DFbgmm = DFbgmm'; * calculate variance; mat DFvgmm = DFxxai; est post DFbgmm DFvgmm, dep("DFy") obs(`DFnobs') esample(`touse'); }; if "`keepvar'" == "" {; restore; dfdrmat; }; else {; restore, not; }; * post estimation info; est local cmd "dfregcv"; est local depvar "`varlist'"; est local twentry "`twentry'"; est local mzdummy "`mzdummy'"; est local covars "`covars'"; est local dfmeth "`dfmeth'"; est scalar rdz = `rdz'; if "`dfmeth'" != "gmm" {; est scalar mss = `mss'; est scalar df_m = `df_m'; est scalar rss = `rss'; est scalar df_r = `df_r'; est scalar r2 = `r2'; }; Replay, `level'; end; program define Replay; syntax [, Level(int $S_level)]; di in gr "**************************************************************"; di ""; di "DOUBLE ENTRY DF ESTIMATION:"; di ""; di in gr "dependent variables: `e(depvar)'"; di in gr "twin-pair identifier: `e(twentry)'"; di in gr "MZ dummy: `e(mzdummy)'"; di in gr "genetic overlap DZ twins: `e(rdz)'"; di in gr "Method: " in re "`e(dfmeth)'"; di ""; di in gr "Covariates"; local i = 1; tokenize `e(covars)'; while "``i''" != "" {; local ccv "``i''"; di in gr "DFcov`i' equals " in ye "`ccv'"; local i = `i' + 1; }; * for non-gmm models; if "`e(dfmeth)'" != "gmm" {; di in gr _n " Source | SS df MS " _col(54) "# twin pairs = " in ye %9.0f e(N)/2 ; di in gr "---------+------------------------------" _col(54) "# twins (N) = " in ye %9.0f e(N); di in gr " Model | " in ye %11.0g e(mss) " " %5.0f e(df_m) " " %11.0g e(mss)/e(df_m) ; di in gr "Residual | " in ye %11.0g e(rss) " " %5.0f e(df_r) " " %11.0g e(rss)/e(df_r) in gr _col(54) "R-squared = " in ye %9.4f e(r2) ; di in gr "---------+------------------------------" ; di in gr " Total | " in ye %11.0g e(mss)+e(rss) " " %5.0f e(df_m)+e(df_r) " " %11.0g (e(mss)+e(rss))/(e(mss)+e(rss)) ; }; else {; di in gr _col(54) "# twin pairs = " in ye %9.0f e(N)/2 ; di in gr _col(54) "# twins (N) = " in ye %9.0f e(N); }; di ""; if "`e(dfmeth)'" == "noadjust" {; di in gr "Standard errors of double entry DF analysis WITHOUT adjustment:"; }; if "`e(dfmeth)'" == "times2" {; di in gr "Standard errors of double entry DF analysis multiplied by sqrt(2):"; }; if "`e(dfmeth)'" == "varest" {; di in gr "Asymptotically correct std. errors:"; }; if "`e(dfmeth)'" == "gmm" {; di in gr "Efficient GMM estimation:"; }; est dis , level(`level'); end; prog define dfdrmat; cap mat drop DFb0; cap mat drop DFV0; cap mat drop DFxeex2; cap mat drop DFxeex; cap mat drop DFxx; cap mat drop DFxxi; cap mat drop DFxxall; cap mat drop DFxxai; cap mat drop DFxyall; cap mat drop DFnod ; cap mat drop DFnom ; cap mat drop DFoid ; cap mat drop DFoim ; cap mat drop DFomd ; cap mat drop DFomm ; cap mat drop DFxxd ; cap mat drop DFxxm ; cap mat drop DFxyd ; cap mat drop DFxym ; cap mat drop DFzxd ; cap mat drop DFzxm ; end;