/*Kaz SAS juku course*/
/*www.estat.us*/
/*PROC STANDARD to create Z-scores or imputation of missing values*/
Data ABC; set sashelp.Prdsale; /*This is a SAS default data set for a practice*/
/*just take a random sample since this data is big*/
X=ranuni(3);
if X < .1;
/*create missing values on purpose for demonstration's sake*/
if X < .07 then ACTUAL=.;
if X < .07 then PREDICT=.;
run;
proc print data=ABC;
title "Original data set";
var REGION ACTUAL PREDICT;
run;
/*creating Z-score, mean=0 and Standard deviation=1.
Of course these numbers can be changed as you like. */
proc standard data=ABC out=ABC2 mean=0 std=1;
var ACTUAL PREDICT;
run;
proc print data=ABC2;
title "Z-score, missing values not imputed";
var REGION ACTUAL PREDICT;
run;
/*Use REPLACE: if there are missing values, they are imputed with mean*/
proc standard data=ABC out=ABC3 mean=0 std=1 REPLACE;
var ACTUAL PREDICT;
run;
proc print data=ABC3;
title "Z-score, missing values imputed with grand mean";
var REGION ACTUAL PREDICT;
run;
/*Standardize within groups*/
/*must sort first*/
proc sort data=ABC out=ABC2;
by REGION;
run;
/*Group mean centering and impulation by group mean using REPLACE*/
proc standard data=ABC2 out=ABC4 mean=0 std=1 REPLACE;
var ACTUAL PREDICT;
by region;
run;
proc print data=ABC4;
title "Z-score, missing values imputed with group mean";
var REGION ACTUAL PREDICT;
run;