/*get rid of commitment to work variable*/ /*specifying library*/ %let direc=C:\AAA PROJECTS\ECBW\N2P; libname here "&direc" ; libname there "&direc.data7" ; option spool; data finaldata;set here.finaldata; if F1weight > -9; /*Just last minute variable recoding*/ drop commit ; if race=2 then race2="Hispanic"; if race=1 then race2="Asian"; if race=3 then race2="Black"; if race=4 then race2="White"; if race=5 then race2="Native"; /*creating TS_talkA*/ /*Oct 2002, following Charle's hunch, I am using the original coding here*/ if TS_talk2=2 then TS_talkA="2S talk two Ts"; if TS_talk2=1 then TS_talkA="1S talk one T"; if TS_talk2=0 then TS_talkA="3S talk zero T"; *if TS_talk2 < -999 then TS_talkA="4missing"; closureX=closure; if closure = 0 /*or closure=.5*/ then CLOSE=0; if closure=.5 or closure = 1 or closure=1.5 then CLOSE=1; if closure=2 or closure =2.5 or closure=3 or closure=3.5 or closure=4 then CLOSE=2; if CLOSE=0 then CLOSE0=1; if CLOSE=1 then CLOSE1=1; if CLOSE=2 then CLOSE2=1; /*risk factor*/ if BYRISK >=0 and BYRISK < = 6 then do; RISK0=0; RISK1=0; end; if BYRISK=0 then RISK0=1; if BYRISK> 0 and BYRISK =<6 then RISK1=1; /*delition flag due to missing cases*/ /*tedeous because these were stirng variables*/ %macro nan (var1=,var2=); miss&var2=0; if &var1 = "" then miss&var2=1; %mend nan; %nan (var2=1,var1=gender); %nan (var2=2,var1=race2); %nan (var2=3,var1=urban); %nan (var2=4,var1=F1schooltype); %nan (var2=5,var1=F1palvalue); %nan (var2=6,var1=F1clubtime); *%nan (var2=7,var1=F1goodteacher);/*I took this out. This came from F2 cycle*/ %nan (var2=8,var1=F1getalong); %nan (var2=9,var1=F1tutor); %nan (var2=10,var1=TS_talkA); %nan (var2=11,var1=F1famact); X1=sum(of miss1 miss2 miss3 miss4 miss5 miss6 /*miss7*/ miss8 miss9 miss10 miss11); drop miss1 miss2 miss3 miss4 miss5 miss6 /*miss7*/ miss8 miss9 miss10 miss11; /*easy because these are numerics*/ X2=nmiss(of F1compo_score f1ses F1G10enrol parentschool closure /* commitR*/); X3=X1+X2; valid=0;if X3=0 then valid=1; /*just student teacher tie information missing*/ nodata=0; if TS_talk2 < -999 then nodata=1; /*data missing*/ if valid =1 then nodata2=0; if valid =0 then nodata2=1; whole="whole"; /*creating 1 0 dummy variables for descriptive statistics, so I can use proc means instead of proc freq*/ if race > -9 then do; hispanic=0;asian=0;black=0;white=0;native=0; if race=2 then hispanic=1; if race=1 then asian=1; if race=3 then black=1; if race=4 then white=1; if race=5 then native=1; end; misrace=0; if race < -9 then misrace=1; urban2=0;rural=0;suburb=0; if urban="(Urb)" then urban2=1; if urban="(Rur)" then rural=1; if urban="(Sub)" then suburb=1; if BYgetalong="S not get along with T" then BYgetalong2=0; if BYgetalong="S get along with T" then BYgetalong2=1; if F1getalong="S not get along with T" then F1getalong2=0; if F1getalong="S get along with T" then F1getalong2=1; if TS_talkA ne "" then do; talk0=0;talk1=0;talk2=0; if TS_talkA="3S talk zero T" then talk0=1; if TS_talkA="1S talk one T" then talk1=1; if TS_talkA="2S talk two Ts" then talk2=1; end; private=0;catholic=0;public=0; if F1schooltype="1Private" then private=1; if F1schooltype="2Catholic" then catholic=1; if F1schooltype="3Public" then public=1; If F1famact="2FamTalk No" then F1famact2=0; If F1famact="1FamTalk Yes" then F1famact2=1; If F1palvalue="2FamTalk No" then F1palvalue2=0; If F1palvalue="1FamTalk Yes" then F1palvalue2=1; run; data finaldata;set finaldata; if X=1 then closurerank2="highclose"; if X=0 then closurerank2="lowclose"; if valid=0 then valid2="not valid"; if valid=1 then valid2="valid"; changeCompo_score=F1compo_score-Bycompo_score; run; %let vars= F1problem BYproblem BF1devRlog F1college BYcollege asian black hispanic native F1ses F1compo_score rural suburb urban2 F1G10enrol private catholic F1famact2 F1palvalue2 F1clubtime parentschool F1getalong2 F1tutor talk0 talk1 talk2 closure late cut skip cheat copy fight gang racist sexist steal destroy smoke drink drug weapon abuse talkback disobey; /*First I get descriptive statistics of the NELS data, the whole thing, for the variables used in my analysis. After this, I get the same statistics for my analytical sample, the data that I actually used. I wanted to compare the whole sample with the smaller data that I used. The data became small because of missing cases. The rate of missing was very severe, so I wanted to compare the two samples*/ proc means data=finaldata mean std min max n stderr; weight F1weight; class gender; weight F1weight; var &vars; ods output summary=whole; run; data whole;set whole; length ID $ 10; x="NELS"; ID=x||gender; drop gender; run; /*GET descriptive statistics for subgroups within data*/ /*In this case, boys and girls' samples*/ /*Note the variable "valid2" that I used at where-line*/ /*In this way I pick cases that have no missing values*/ /*See how valid2 is constructed above*/ %macro jones (var1=, and=); proc means data=finaldata mean std min max n stderr; weight F1weight; where valid2="valid"; class &var1; weight F1weight; var &vars ; ods output summary=&var1; run; data &var1;set &var1; length ID $ 10; ID=&var1; drop &var1; run; %mend jones; *%jones (var1=valid2); %jones (var1=gender); *%jones (var1=race2); *%jones (var1=urban); *%jones (var1=closurerank2); *%jones (var1=ts_talkA); *%jones (var1=F1schooltype); /*Merging the two result data sets*/ data allthese; set whole gender; run; /*Transpose to get the data into a shape*/ proc transpose data=allthese out=alltheseT; id ID; run; /*Lots of manipulation to put the data into a shape that is ready to go into an excel sheet*/ data alltheseT2;set alltheseT; x=_name_; group=tranwrd(x,"_Mean",""); group=tranwrd(group,"_StdDev",""); group=tranwrd(group,"_Min",""); group=tranwrd(group,"_N",""); group=tranwrd(group,"_Max",""); group=tranwrd(group,"_StdErr",""); run; /*I do proc print once in a while to see how the data looks like*/ proc print; run; proc print data=alltheseT; where _label_="Mean" or _label_="Std Dev" or _label_="N Obs"; run; /*Sometimes I have to do a complex way of PROC TRANSPOSE*/ /*I don't enjoy this sort of programming*/ %macro john (var1=); proc transpose data=alltheseT2 out=&var1; by group notsorted; id _label_; var &var1; run; data &var1;set &var1; keep group &var1.mean &var1.std_ &var1.max &var1.min &var1.stdev &var1.N &var1.std_error ; if minimum=0 and maximum=1 then do; x=(mean*(1-mean)); Std_Error=sqrt(x/(N) ); end; drop x; &var1.mean=mean; &var1.std_error=Std_Error; &var1.max =maximum; &var1.min = minimum; &var1.stdev=std_dev; &var1.N=N; run; %mend john; %john (var1=_BOY_); %john (var1=_GIRL_); %john (var1=NELS_BOY_); %john (var1=NELS_GIRL_); data descriptivestat; /*retain statement changes the order of variables in the data set*/ /*In this case I did not need it, so I did not use it*/ *retain NELS_BOY_mean NELS_BOY_std_error NELS_BOY_max NELS_BOY_min NELS_BOY_stdev NELS_BOY_N _BOY_mean _BOY_std_error _BOY_max _BOY_min _BOY_stdev _BOY_N NELS_GIRL_mean NELS_GIRL_std_error NELS_GIRL_max NELS_GIRL_min NELS_GIRL_stdev NELS_GIRL_N _GIRL_mean _GIRL_std_error _GIRL_max _GIRL_min _GIRL_stdev _GIRL_N ; merge NELS_BOY_ _BOY_ NELS_GIRL_ _GIRL_; *by group; run; proc print;run; PROC EXPORT DATA= work.descriptivestat OUTFILE= "C:\Documents and Settings\kuekawa\My Documents\tables.xls" DBMS=EXCEL2000 REPLACE; RUN;