Posts – Page 49 – My Statistical tools

November 26, 2012

Saving log and output files into external text files (to avoid stopping of a SAS run)

filename printout 'C:\temp\log.txt';
filename logout 'C:\temp\output.txt';
proc printto print=printout log=logout new;
run;

proc printto ;
run;

November 21, 2012November 21, 2012

SAS Format examples

Examples of format statements

FORMAT disaster_date date9. ;

format ProjectNumber $12.;

November 19, 2012

RETAIN statement to collapse rows

data sample;
input id $ var1 var2 var3;
datalines;
A 20 . .
A . 30 40
B 20 . .
B 20 . .
C . 10 .
C 10 . .
C . . 10
;

data sample;
set sample;
by id;
array v(3) var1-var3;
array n(3) v1-v3;
retain v1-v3;
do i = 1 to 3;
if v(i) ne . then n(i)=v(i);
end;
if last.id then do;
output;
do j = 1 to 3;
n(j)=.;
end;
end;
drop i j var1-var3;
rename v1=var1 v2=var2 v3=var3;

proc print;
run;

Thanks GP.

November 19, 2012

Logistic regression and and comparison of group means in the model (using PROC GLIMMIX)

For logistic regression models in PROC GLIMMIX, you need to use dist=binary link=logit option on the MODEL statement (sorry I missed pointing this out in your program in my previous response). So please add these two options.

For logistic regression models, the estimations are all on the logit scale, so is the LSMEANS statement. To get the lsmeans on the original scale (the probability scale), you can use the ILINK option --

lsmeans &group / ilink ;

For difference in the groups, you use the DIFF option on the LSMEANS statement. The results are also on the logit scale. If you use the OR option, you will get the odds ratios for the group effect --

lsmeans &group /diff or;

Unfortunately, the difference in the probability scale between the groups are not directly available in PROC GLIMMIX. The magnitude of the difference is easy to compute -- you use the results from the ILINK option output, which gives you the estimated probabilities in each group, and compute the difference by hand or by using a data step, however, the appropriate standard errors for these differences are not available in PROC GLIMMIX.

Thanks, JT.

November 19, 2012

Replace all letters and numeric values into xxxx

This replaces all letters into xxxx.

data a;
q=" Licensed to ICF CONSULTING GROUP, Site 70007867. ";
prx=prxparse("s/[A-Z]/x/io");
call prxchange(prx,-1,q);
drop prx;
run;

proc print;run;

This replaces all letters and numeric values into xxx.

data a;
q=" Licensed to ICF CONSULTING GROUP, Site 70007867. ";
prx=prxparse("s/[A-Z,0-9]/x/io");
call prxchange(prx,-1,q);
drop prx;
run;

proc print;run;

Thanks, KR.

November 18, 2012

Running Winsteps from within SAS

data rasch;
input line $ 1-75;
cards;
&INST
TITLE='KNOX CUBE TEST'
NI=18
ITEM1=11
NAME1=1
PERSON=KID
ITEM=TAP
PFILE=EXAMPLE1.PF
IFILE=EXAMPLE1.IF
&END
1-4
2-3
1-2-4
1-3-4
2-1-4
3-4-1
1-4-3-2
1-4-2-3
1-3-2-4
2-4-3-1
1-3-1-2-4
1-3-2-4-3
1-4-3-2-4
1-4-2-3-4-1
1-3-2-4-1-3
1-4-2-3-1-4
1-4-3-1-2-4
4-1-3-4-2-1-4
END NAMES
;

data rasch;set rasch;
file "C:\TEMP\winstep.txt";
put (line) (100.0);
run;
option xwait xsync;
x "start C:\BIGSTEPS\BIGSTEPS.EXE C:\TEMP\winstep.txt C:\TEMP\output.txt";

November 18, 2012December 12, 2018

PROC TTEST for T-test

data exercise;
input Subject_ID $ Posttest Treatment $;
cards;
A 24 T
B 26 C
C 25 T
D 44 C
E 45 T
F 24 C
G 25 T
;
run;

PROC TTEST;
class treatment;
var Posttest;
RUN;

November 18, 2012

PROC TTEST and created a result dataset

/*Ttest Macro*/

/*Creates a result sad data set t_test_results*/
/*Find it in a temp folder and click-open it as an excel file*/

%let dataname=sashelp.class;
%let varlist=weight height age;
%let group=sex;

proc ttest data=&dataname;
class &group;
var
&varlist
;
ods output statistics=kaz1 ttests=kaz2 equality=kaz3;
run;

data kaz3b;
set kaz3;
if ProbF < 0.05 then unequal=1;
if unequal=1;
keep Variable unequal;
run;
proc sort;by Variable ;run;
proc sort data=kaz2;by Variable ;run;
data both;
merge kaz2 kaz3b;
by Variable ;
if unequal ne 1 then unequal=0;
flag=0;
if unequal=0 and variances="Equal" then flag=1;
if unequal=1 and variances="Unequal" then flag=1;
if flag=1;

SIG=" ";
if Probt < 0.05 then SIG="*";

keep Variable Probt SIG variances;
run;

data kaz1b;
set kaz1;
jun=_n_;
run;
proc sort;by Variable ;run;

data t_test_results;
merge kaz1b both;
by Variable ;
this=0;
if class = "Diff (1-2)" then this=1;
if this =1 then do;
probt2=probt;
SIG2=SIG;
variances2=variances;
end;

keep Variable Class mean N probt2 sig2 variances2;

run;

November 18, 2012

Example: how to use ODS in PROC GLIMMIX or other procs

/*Use proc GLIMMIX to run an OLS regression
and saves results (parameter estiamtes) in a
data set named "john" using ODS*/
proc glimmix data=sashelp.class;
model height=weight /dist=normal link=identity solution;
ods output ParameterEstimates=john;
run;
/*Edit the result data*/
data john2;set john;

/*Create a new variable that indicates
the level of significance*/

/*Do not forget to specify a value length*/
length asterisk $ 3;

if Probt < 0.10 then asterisk="~";
if Probt < 0.05 then asterisk="*";
if Probt < 0.01 then asterisk="**";
if Probt < 0.001 then asterisk="***";

run;

/NOW FIND john2 in a work directory and right-click it
to open with Excel*/

/*You can also see this by PROC PRINT*/
proc print data=john2;
run;

November 18, 2012November 18, 2012

ROC Curve Analysis using PROC LOGISTIC

/*ROC Curve Analysis Macro*/

/*a hypothetical data set*/
data asdf;set sashelp.class;
EVENT=0;
if Weight > 100 then EVENT=1;
PREDICTOR=height;
run;

/*data name*/
%let dataname=asdf;
%let outcome=EVENT;
%let ind=PREDICTOR;
%let save_graphic=C:\Documents and Settings\19702\My Documents\sas;

ods html PATH="&save_graphic" (url=none) file="&dataname &ind .html";
ods graphics on / imagename="&dataname&ind";
proc logistic data=&dataname descending OUTEST=&dataname.result;
title "&dataname";
model &outcome =
&ind
/ outroc=&dataname.kaz2 ROCEPS=0 ;
output out = m2 p = prob xbeta = logit ;
ods output ParameterEstimates=kazcoeff
Association=kazassoc
ConvergenceStatus=kazconverg(keep= reason);
run;
ods graphics off;
ods html close;

proc transpose data=kazassoc out=T1;
var cValue1;
id label1;
run;
proc transpose data=kazassoc out=T2;
var cValue2;
id label2;
run;

data kazassoc2;
merge T1 T2;
run;

/*ods trace off;*/
/*Get descriptive statistics*/

ods listing close;
proc means data=&dataname;
var
&outcome
&ind
;
ods output summary=uekawa;
run;
ods listing;
/*get significance of the independent varible*/
data kazcoeff2;
set kazcoeff;
if Variable="&ind";
keep ProbChiSq StdErr flag;
flag=1;
label ProbChiSq="P-value for the ind var effect";
label StdErr="Stderr for the ind var effect";
run;

data &dataname.kaz2;set &dataname.kaz2;
flag=1;
run;

data &dataname.result;
set &dataname.result;
flag=1;
run;

data &dataname.kaz3;merge &dataname.kaz2 &dataname.result kazcoeff2;
by flag;
run;

data &dataname.kaz4;set &dataname.kaz3;
Distance=sqrt( (0-_1MSPEC_)**2 + (1-_SENSIT_)**2 );
suji=_n_;
run;
proc sql;
create table &dataname.kaz5 as
select *,
min(distance) as minimum_distance
from &dataname.kaz4;
run;

data optimal;
retain CUT_OFF_VALUE;
set &dataname.kaz5;
CUTOFF=0;
if distance = minimum_distance then do; CUTOFF=1; type="Dist to perfection";end;
/*if distance2 = maximum_distance2 then do; CUTOFF=1;
type="Dist to noninf";end;*/
if cutoff=1;
effect=&ind ;
LOGIT=LOG(_PROB_ / (1-_PROB_));
CUT_OFF_VALUE=((LOGIT-Intercept)/effect);
drop cutoff ;
run;
data results_of_ROC;
merge optimal uekawa kazassoc2 kazconverg;

TRUE_POSITIVE_RATE=_SENSIT_;
TRUE_NEGATIVE_RATE=1-_1MSPEC_;
AUC=C;
run;

proc print data=results_of_ROC;
title "ROC stats for &outcome";
var CUT_OFF_VALUE
TRUE_POSITIVE_RATE
TRUE_NEGATIVE_RATE
AUC ;
run;