/* In contrast to the dataset provided for ÜGK17, the scientific use file for ÜGK16 available at FORSbase (version 1.0.0, 10.23662/FORS-DS-1004-1 does not include a predefined variable for the socio-economic status (SES) of the students. Also, it does not include multiply imputed data that address missing values. Because one of the components of SES (i.e., parent’s highest educational attainment) includes a substantial number of missings, the problem of missing values should be addressed before generating the variable ses. This script applies the procedure used for PISA for imputing the three components used in the definition of SES and defines the variable ses and qses as defined in the technical appendices to the ÜGK report. Note: The resulting variables will not be identical to ones used for the national report. Imputation follows: PISA 2015 Technical Report, P. 339: http://www.oecd.org/pisa/data/2015-technical-report/PISA2015_TechRep_Final-Chapter16.pdf "For students with missing data on one out of the three components [for ÜGK: highest education of parents, highest ISEI of parents, number of books at home] the missing variable was imputed. Regression on the other two variables was used to predict the third (missing) variable, and a random component was added to the predicted value. If there were missing data on more than one component, ESCS [for ÜGK: SES] was not computed and a missing value was assigned for ESCS [for ÜGK: SES]." Recoding of components and definition of SES follows: Pham, G., Helbling, L., Verner, M., Petrucci, F., Angelone, D., & Ambrosetti, A. (2019). ÜGK – COFO – VeCoF 2016 results: Technical appendices. St.Gallen & Genf: Pädagogische Hochschule St.Gallen (PHSG) & Service de la recherche en éducation (SRED), pp. 4-7. http://uegk-schweiz.ch/wp-content/uploads/2019/06/UEGK16__Technical-appendices.pdf Author: Simon Seiler, ICER, University of Bern Version 1.2; 2021, February 24 Changelog: * 1.2: hisei08 imputation using truncreg instead of regress */ version 16 // -> should work with older versions of Stata, except direct import of SPSS-data clear all cd "H:\Documents\uegk 2016 Daten def forsbase" // Import SPSS-Data import spss using "1004_UGK16_Data_E_v1.0.0.sav", clear egen byte sesmissing = rowmiss(hisei08 fmedu books) // according to Pham et al. 2019, "other" educational attainments have // been treated as a missing code when combining mother's and father's // educational attainment to the variable "fmedu". Obviously, this was not the // case as fmedu is "other" if medu or fedu is missing and the other is "other". // In order to avoid an inconsistent definition of fmedu (and, subsequently, ses) // we set fmedu to missing here. mvdecode fmedu, mv(8 = .) // recode books to books5, according to Pham et al. 2019 recode books (1 2 = 0 "0-10 books") /// (3 = 1 "11-50 books") /// (4 = 2 "51-100 books") /// (5 = 3 "101-250 books") /// (6 7 = 4 "more than 250 books"), gen(books5) // impute components for variable ses, according to PISA 2015 Technical Report mi set wide mi register imputed hisei08 fmedu books5 sort id_student set seed 89564 mi impute chained /// (truncreg, ll(11.01) ul(88.96)) hisei08 /// limits taken from harryganzeboom.nl/ISCO08/index.htm (ologit) fmedu books5 /// if sesmissing<2 [pw=smp_w_nrastubw], add(1) by(id_canton) clonevar imp_hisei08 = hisei08 clonevar imp_fmedu = fmedu clonevar imp_books5 = books5 replace imp_hisei08 = _1_hisei08 if hisei08 >= . & _1_hisei08 < . replace imp_fmedu = _1_fmedu if fmedu >= . & _1_fmedu < . replace imp_books5 = _1_books5 if books5 >= . & _1_books5 < . mi unset label define impflag /// 0 "not missing" /// 1 "missing, imputed" /// 2 "missing, not imputed" generate byte impflag_hisei08 = cond(hisei08<.,0,cond(imp_hisei08<.,1,2)) generate byte impflag_fmedu = cond(fmedu <.,0,cond(imp_fmedu <.,1,2)) generate byte impflag_books5 = cond(books5 <.,0,cond(imp_books5 <.,1,2)) label val impflag_hisei08 impflag_fmedu impflag_books5 impflag // standardize components summarize imp_hisei08 [aw=smp_w_nrastubw] generate zimp_hisei08 = (imp_hisei08 - `r(mean)')/`r(sd)' summarize imp_fmedu [aw=smp_w_nrastubw] generate zimp_fmedu = (imp_fmedu - `r(mean)')/`r(sd)' summarize imp_books5 [aw=smp_w_nrastubw] generate zimp_books5 = (imp_books5 - `r(mean)')/`r(sd)' // ses // - meanscore egen ses1 = rowmean(zimp_hisei08 zimp_fmedu zimp_books5) /// if !missing(zimp_hisei08,zimp_fmedu,zimp_books5) // - standardize ses summarize ses1 [aw=smp_w_nrastubw] generate ses = (ses1 - `r(mean)')/`r(sd)' // quartiles of ses xtile qses = ses [pw=smp_w_nrastubw], n(4) label def qses /// 1 "SES: 1st quartile" /// 2 "SES: 2nd quartile" /// 3 "SES: 3rd quartile" /// 4 "SES: 4th quartile" label val qses qses label define impflagses /// 0 "SES based on complete information (no missings in components)" /// 1 "SES based on imputed information (one component missing but imputed)" /// 2 "SES missing (more than one component missing, not imputed)" generate byte impflag_ses = /// cond(impflag_hisei08==0 & impflag_fmedu==0 & impflag_books5==0,0, /// cond(impflag_hisei08==1 | impflag_fmedu==1 | impflag_books5==1,1,2)) label values impflag_ses impflagses // label variables label var books5 "`: var label books', recoded" label var imp_hisei08 "`: var label hisei08', imputed" label var imp_fmedu "`: var label fmedu', imputed" label var imp_books5 "`: var label books5', imputed" label var impflag_hisei08 "flags imputed values in imp_hisei08" label var impflag_fmedu "flags imputed values in imp_fmedu" label var impflag_books5 "flags imputed values in imp_books5" label var impflag_ses "flags imputed values in ses/qses" label var ses "socioeconomic status (SES)" label var qses "socioeconomic status (SES), quartiles" // tidy-up drop sesmissing *_1_ mi_miss zimp_* ses1 order books5 /// imp_hisei08 impflag_hisei08 /// imp_fmedu impflag_fmedu /// imp_books5 impflag_books5 /// ses qses impflag_ses, last