#clear console
rm(list = ls())

setwd("C:/sas/(01) D/newdata")

crim<- read_excel("HistoryCombined.xlsx",sheet="History ")

#Creating the new dataset after WV review -- I added dual credit info per grade level
write.foreign(crim, "C:/sas/(01) D/newdata/data.txt",
"C:/sas/(01) D/newdata/data.sas", package="SAS")

 

Packages I use are:

library(broom)
library(compute.es)
library(dplyr)
library(forcats)
library(FSA)
library(gapminder)
library(ggplot2)
library(gmodels)
library(haven)
library(here)
library(leaflet)
library(magrittr)
library(markdown)
library(MatchIt)
library(plyr)
library(psych)
library(purrr)
library(readr)
library(readxl)
library(sas7bdat)
library(sf)
library(sqldf)
library(stringr)
library(summarytools)
library(tidyverse)
library(tmap)
library(tmaptools)
library(descr)
library(MatchIt)
library(sjmisc)
library(writexl)
library(skimr)
library(lme4)
library(lmerTest)
library(car)
library(foreign)

Logistic regression in R and creating WWC effect size

#https://stats.oarc.ucla.edu/r/dae/logit-regression/
mylogit1 <- glm(enroll_FR_spring ~ treat+ male +minority +disadv +binary_dualcredit+SAT_TOTAL + GPA_12_GRADE +FALL_2020_INSTNAME, data = sample, family = "binomial")
summary(mylogit1)

coef_table<-(coef(mylogit1))
two_values<-coef_table[1:2]
C_LOGIT<-two_values[1]
C_EXP<-exp(C_LOGIT)/(1+exp(C_LOGIT))
C_ODDS<-C_EXP/(1-C_EXP)
C_STEP1<-log(C_ODDS)
T_LOGIT<-two_values[1]+two_values[2]
T_EXP<-exp(T_LOGIT)/(1+exp(T_LOGIT))
T_ODDS<-T_EXP/(1-T_EXP)
T_STEP1<-log(T_ODDS)
STEP2<-T_STEP1-C_STEP1
wwc_effect_size<-STEP2/1.65
odds_ratio<-T_ODDS/C_ODDS

R function example -- when a data frame is involved

var.labels = c(age="Age in Years", GPA_12_GRADE="Test Score", gpa="GPA", sex="Sex of the participant")
DATA_KAZ <- data.frame(age = c(21, 30, 25, 41, 29, 33,NA),
GPA_12_GRADE = c(75, 65, 88, 92, 97, 79, 83),
gpa = c(3.4,2.7,3.9,4.0,2.5,1.2, 3.2),
sex = factor(c(1, 2, 1, 2, 1, 2, 1), labels = c("Female", "Male")),
race=factor(c(1,1,2,2,3,3, 1), labels = c("White", "Black","Hispanic")))
label(DATA_KAZ) = as.list(var.labels[match(names(DATA_KAZ), names(var.labels))])

CCC<-filter(DATA_KAZ,sex=="Female")
TTT<-filter(DATA_KAZ,sex=="Male")

 

kaz_macro<-function(kaz1){
col_name <- deparse(substitute(kaz1))
C_mean<-mean(CCC[[col_name]])
T_mean<-mean(TTT[[col_name]])
C_sd<-sd(CCC[[col_name]])
T_sd<-sd(TTT[[col_name]])
C_n<-length(CCC[[col_name]])
T_n<-length(TTT[[col_name]])

simple_gap=T_mean-C_mean
g1<- ((T_n-1)*(T_sd*T_sd))+((C_n-1)*(C_sd*C_sd))
g2= T_n + C_n -2
g3= sqrt(g1/g2)
wwc_effect= simple_gap/g3
wwc_effect
}
kaz_macro(GPA_12_GRADE)
kaz_macro(gpa)

Baseline Check using SAS

proc means data=both4 stackodsoutput;
class treat;
var
male minority age
risk_miss
risk_08
risk_09
risk_10
felony_binary
misdem_Sum
felony_Sum
misdem_binary;
ods output summary=temp1a;
run;

proc means data=outgs2 stackodsoutput;
class treat;
var
male minority age
risk_miss
risk_08
risk_09
risk_10
felony_binary
misdem_Sum
felony_Sum
misdem_binary;
ods output summary=temp1b;
run;

data temp1a;
set temp1a;
datatype="(1)raw data";
run;

data temp1b;
set temp1b;
datatype="(2)sample";
run;

data temp1;
set temp1a temp1b;run;

data T;set temp1;
if treat=1;
suji=_n_;
T_N=N;
T_Mean=Mean;
T_SD=StdDev;
T_Min=Min;
T_Max=Max;
keep suji variable T_N T_mean T_SD T_min T_max datatype;
run;
proc sort;by variable;run;

data C;set temp1;
if treat=0;
suji=_n_;
C_N=N;
C_Mean=Mean;
C_SD=StdDev;
C_Min=Min;
C_Max=Max;
keep variable C_N C_mean C_SD C_min C_max;
run;
proc sort;by variable;run;

data TC;
merge T C;
by variable;

/*create statistics*/
mean_dif=(T_Mean-C_Mean);
/*Standardized effects*/
g1=
((T_N-1)*(T_SD*T_SD))
+((C_N-1)*(C_SD*C_SD));

g2=T_N + C_N -2;
g3=sqrt(g1/g2);
WWC_effect=mean_dif/g3;

outcome_type="interval";
if T_Min=0 and T_Max=1 and C_Min=0 and C_Max=1 then do;
outcome_type="binary";
/*&usethis._Mean_Yes-&usethis._Mean_NO*/
Odds_C=(C_Mean/(1-C_Mean));
Odds_T=(T_Mean/(1-T_Mean));

Odds_ratio=Odds_T/Odds_C;

LN_C=LOG(Odds_C);
LN_T=LOG(Odds_T);
LN_DIF=LN_T-LN_C;

WWC_effect=/*abs*/(round(LN_DIF/1.65,0.001));/*fixed 06 01 2016*/

/*If greater than 02, Small Effect
If greater than 0.5, Medium Effect
If greater 0.8 then Large Effect*/
end;

if WWC_effect ne . then do;
if abs(WWC_effect) > 0.2 then cohen="Small ";
if abs(WWC_effect) > 0.5 then cohen="Medium";
if abs(WWC_effect) > 0.8 then cohen="Large";
end;

drop
LN_DIF
LN_C
LN_T
Odds_ratio
Odds_T
Odds_C
g3
g2
g1
;

run;
proc sort;by suji;run;

 

QC Comparison of WWC effect size code using R and SAS

I wrote this entry when I wanted to QC my WWC effect size calculation using R and SAS.

 

WWC standard doc

https://ies.ed.gov/ncee/wwc/Docs/referenceresources/wwc_procedures_v2_1_standards_handbook.pdf

Page 37.

 

In SAS:

data test;

N_Yes=4300;
Mean_Yes=400;
StdDev_Yes=200;

N_No=4000;
Mean_NO=300;
StdDev_NO=200;

/*create statistics*/
mean_dif=(Mean_Yes-Mean_NO);
/*Standardized effects*/

g1=((N_Yes-1)*(StdDev_Yes*StdDev_Yes)) +((N_No-1)*(StdDev_No*StdDev_No));
g2=N_Yes + N_No -2;
g3=sqrt(g1/g2);
WWC_effect=mean_dif/g3;

run;

 

In R:

FGC_l_N_YES=4300
FGC_l_Mean_YES=400
FGC_l_StdDev_YES=200

REG_l_N_YES=4000
REG_l_Mean_YES=300
REG_l_StdDev_YES=200

simple_gap=FGC_l_Mean_YES-REG_l_Mean_YES

g1<- ((FGC_l_N_YES-1)*(FGC_l_StdDev_YES*FGC_l_StdDev_YES))+((REG_l_N_YES-1)*(REG_l_StdDev_YES*REG_l_StdDev_YES))
g2= FGC_l_N_YES + REG_l_N_YES -2
g3= sqrt(g1/g2)
simple_gap_std= simple_gap/g3
simple_gap_std

g1
g2
g3
simple_gap_std

 

My web calculator

https://www.estat.us/file/calc_t_test1b.php

 

Treatment N:4300
Treatment mean:400
Treatment SD:200

Comparison N:4000
Comparison mean:300
Comparison SD:200
The group mean difference:100

[RESULTS FOR CONTINUOUS OUTCOME]

Probability: Under Development (Still working on this)
T-score is: 22.761
Significant at alpha 0.05 (two tail test;I used a z-test and ignored degree freedom; threshold 1.96)

T-test (the same test as above but with three thresholds)T 1.96, 2.576, 3.291, each for p=0.05, 0.01, 0.001
Sig at p=.001***

Hedges d 0.5