R code for deriving Hedge's g (or Cox Index)

#What works clearinghouse version of standardized program difference
#Hedges's g and Cox Index
#See page 15 and 16
#https://ies.ed.gov/ncee/wwc/Docs/referenceresources/WWC_Procedures_Handbook_V4_1_Draft.pdf
#Kaz has the web-based calculator -- to use for QC'ing my results here
#https://www.estat.us/file/calc_t_test1.php

 

CCC<-filter(match.data1,treat==0)
TTT<-filter(match.data1,treat==1)

#These are for raw database
#raw data
#CCC<-filter(studydata3,treatment==0)
#TTT<-filter(studydata3,treatment==1)
#CCC<-filter(psmdata,treat==0)
#TTT<-filter(psmdata,treat==1)

 

###############################################################################
#What works clearinghouse version of standardized program difference
#Hedges's g and Cox Index
#See page 15 and 16
#https://ies.ed.gov/ncee/wwc/Docs/referenceresources/WWC_Procedures_Handbook_V4_1_Draft.pdf
#Kaz has the web-based calculator -- to use for QC'ing my results here
#https://www.estat.us/file/calc_t_test1.php

#Hedges' g for continuous variables
#Kaz added the correct, sample size adjusted version of this while meeting with Samara for transition
kaz_macro_lin<-function(kaz1){
col_name <- deparse(substitute(kaz1))
C_mean<-mean(CCC[[col_name]])
T_mean<-mean(TTT[[col_name]])
C_sd<-sd(CCC[[col_name]])
T_sd<-sd(TTT[[col_name]])
C_n<-length(CCC[[col_name]])
T_n<-length(TTT[[col_name]])
total_n<-C_n+T_n
#linear
simple_gap=T_mean-C_mean
g1<- ((T_n-1)*(T_sd*T_sd))+((C_n-1)*(C_sd*C_sd))
g2= T_n + C_n -2
g3= sqrt(g1/g2)
wwc_effect= simple_gap/g3

#I didn't adjust for sample size
omega<-(1-3/( 4*total_n -9))
wwc_effect_n_adjusted= (omega*simple_gap)/g3
print(T_n)
print(C_n)
print(total_n)
print(T_mean)
print(C_mean)
print(T_sd)
print(C_sd)
print ("Hedges'g without sample size adjustment")
print(wwc_effect)
print ("Hedges'g with sample size adjustment (Use this)")
print(wwc_effect_n_adjusted)
print ("FYI: Adjustment factor")
print(omega)
}

#Cox Index
#Kaz added the correct, sample size adjusted version of this while meeting with Samara for transition
kaz_macro_bin<-function(kaz1){
col_name <- deparse(substitute(kaz1))
C_mean<-mean(CCC[[col_name]])
T_mean<-mean(TTT[[col_name]])
C_sd<-sd(CCC[[col_name]])
T_sd<-sd(TTT[[col_name]])
C_n<-length(CCC[[col_name]])
T_n<-length(TTT[[col_name]])

#binary
Odds_C<-(C_mean/(1-C_mean))
Odds_T<-(T_mean/(1-T_mean))

Odds_ratio<-Odds_T/Odds_C

LN_C<-log(Odds_C)
LN_T<-log(Odds_T)
LN_DIF<-LN_T-LN_C

# WWC_effect=(round(LN_DIF/1.65,0.001))
WWC_effect_binary<-(LN_DIF/1.65)

#sample size adjustment (Kaz is adding this on December 30 2022)
#I didn't use this for writing the report draft
total_n<-C_n+T_n
omega<-(1-3/( 4*total_n -9))
WWC_binary_effect_n_adjusted=(omega*LN_DIF)/1.65;
print(T_n)
print(C_n)
print(total_n)
print(T_mean)
print(C_mean)
print("Cox Index without sample size adjustment")
print(WWC_effect_binary)
print("Cox Index with sample size adjustment -- Use this")
print(WWC_binary_effect_n_adjusted)
print ("FYI: Adjustment factor")
print(omega)
}

###############################################################################
table(psmdata$treat)

kaz_macro_bin(male)
kaz_macro_bin(minority)
kaz_macro_bin(disadv)
kaz_macro_bin(binary_dualcredit)

kaz_macro_lin(GPA_12_GRADE)
kaz_macro_lin(SAT_TOTAL)

#Sam asked me to check this
kaz_macro_lin(TOTAL_DUALCREDIT)

kaz_macro_bin(enroll_FR_spring)
kaz_macro_bin(enroll_SP_fall)
kaz_macro_bin(enroll_SP_spring)

PHP syntax for calculating WWC standardized group difference (Hedges' g and Cox Index)

<?php
function compute()
{
    $Tmean = $_POST['Tmean'];
    $Cmean = $_POST['Cmean'];
    $TSD = $_POST['TSD'];
    $CSD = $_POST['CSD'];
    $TN = $_POST['TN'];
    $CN = $_POST['CN'];
$mean_dif=$Tmean-$Cmean;
$SE=sqrt(
(($TSD*$TSD) / $TN)+(($CSD*$CSD) / $CN)
);
$T=$mean_dif/$SE;
$DF=$TN+$CN-2;
$P="Under Development (Still working on this)";
/*$P=stats_dens_normal($T, 0,1);*/
/*$P=stats_dens_gamma(float $X, float $shape, float $scale);*/
/*$P= $T / 100 ;*/
/*Hedges g*/
/*g numerator*/
$g_numerator=($Tmean-$Cmean)*(1-3/((4*($TN+$CN))-9));
/*g demnominator*/
$g_denominator=SQRT(((($TN-1)* ($TSD**2) )+(($CN-1)* ($CSD**2) ))/($TN+$CN-2));
$hedges_d=$g_numerator/$g_denominator;
$hedges_d_abs=abs($hedges_d);
/*if binary variabels*/
$T_Odds=$Tmean/(1-$Tmean);
$C_Odds=$Cmean/(1-$Cmean);
$Odds_ratio=$T_Odds/$C_Odds;
$Tstep1=log($T_Odds);
$Cstep1=log($C_Odds);
$step2=$Tstep1-$Cstep1;
$WWC_binary_effect=$step2/1.65;
/*Use omega factor to adjust for data size*/
$total_num=$TN + $CN;
$omega=(1-3/( 4*$total_num -9));
$WWC_binary_effect_omega=($omega*$step2)/1.65;
/*
if ($hedges_d >= 0.2) echo "Small Effect (Cohen)";
if ($hedges_d >= 0.5) echo "Medium Effect (Cohen)";
if ($hedges_d >= 0.8) echo "Large Effect (Cohen)";
*/
echo "<br>";
echo "WWC group comparison of continuous variables";
echo "<br>";
echo "<br>";
echo "Values you entered:";
echo "<br>";
echo "<br>";
echo "Treatment N:" .$TN;
echo "<br>";
echo "Treatment mean:" .$Tmean;
echo "<br>";
echo "Treatment SD:" .$TSD;
echo "<br>";
echo "Comparison N:" .$CN;
echo "<br>";
echo "Comparison mean:" .$Cmean;
echo "<br>";
echo "Comparison SD:" .$CSD;
echo "<br>";
echo "The group mean difference:".round($mean_dif,2);
echo "<br>";
echo "without rounding:".round($mean_dif,5);
echo "<br><br>";
/*echo "Probability " .round($P,2);*/
echo "Probability: " .$P;
echo "<br>";
$abs_T=abs($T);
echo "T-score is: " .round($T,2);
echo "<br>";
echo "without rounding: " .round($T,5);
echo "<br>";
if($abs_T < 1.96 ) {
    echo "Not significant at alpha 0.05 (two tail test;I used a z-test and ignored degree of freedom; threshold 1.96)";
}elseif($abs_T >=1.96){
echo "Significant at alpha 0.05 (two tail test;I used a z-test and ignored degree freedom; threshold 1.96)";
}
echo "<br>";
echo "<br>";
echo "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";echo "<br>";
echo "If a variable is an interval scale:";
echo "<br>";
echo "Hedges g " .round($hedges_d,2);
echo "<br>";
echo "without rounding: " .round($hedges_d,5);
echo "<br>";
echo "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";echo "<br>";
echo "<br>";
echo "<br>";
echo "Hedges g description";
echo "<br>";
echo "Page 16 of https://ies.ed.gov/ncee/wwc/Docs/referenceresources/WWC_Procedures_Handbook_V4_1_Draft.pdf";
echo "<br>";
echo "<br>";
echo "For continuous outcomes, the WWC has adopted the most commonly used effect size";
echo "<br>";
echo "index, the standardized mean difference known as Hedges’ g, with an adjustment for small";
echo "<br>";
echo "sample bias. For group design studies, this effect size is defined as the difference between the";
echo "<br>";
echo "mean outcome for the intervention group and the mean outcome for the comparison group, ";
echo "<br>";
echo "divided by the pooled within-group standard deviation of the outcome measure. Defining yi and ";
echo "<br>";
echo "yc as the means of the outcome for students in the intervention and comparison groups, ni and nc";
echo "<br>";
echo "as the student sample sizes, si and sc as the student-level standard deviations, given by ....";
echo "<br>";
echo "In addition, we define as the small sample size correction the effect size (Hedges 1981), which is given by ";
echo "<br>";
echo "<br>";
echo "<br>";
/*cohen's rule of thumb*/
echo "Cohen's rule of thumb";echo "<br>";
echo "if d >= 0.2 small effect -- if d >= 0.5 medium effect if --- d >= 0.8 large effect";echo "<br>";
if($hedges_d_abs < 0.2) {
    echo "Close to zero and Not even small Effect (Cohen)";
}elseif($hedges_d_abs>=0.2 and $hedges_d_abs < 0.5){
echo "Small effect (Cohen)";
}elseif($hedges_d_abs>=0.5 and $hedges_d_abs < 0.8){
echo "Medium effect (Cohen)";
}elseif($hedges_d_abs>=0.8){
echo "Large effect (Cohen)";
}else {
    echo "others";
}
echo "<br>";
echo "<br>";
echo "If a variable is obviously a continuous variable (e.g., MAX is greater than 1), ignore the result below (It says NAN)";
echo "<br>";
echo "<br>";
echo "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";echo "<br>";
echo "If the variable is a binary variable (0 or 1), the Cox index value is " .round($WWC_binary_effect,2);
echo "<br>";
echo "with a lot of digits without rounding " .round($WWC_binary_effect,5);echo "<br>";
echo "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";echo "<br>";
echo "<br>";
echo "<br>";
echo "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";echo "<br>";
echo "Omega adjusted (data size taken into consideration) " .round($WWC_binary_effect_omega,2);
echo "<br>";
echo "with a lot of digits without rounding " .round($WWC_binary_effect_omega,5);
echo "<br>";
echo "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";echo "<br>";
echo "<br>";
echo "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";echo "<br>";
echo "Odds ratio:" .round($Odds_ratio,2);
echo "<br>";
echo "without rounding " .round($Odds_ratio,5);
echo "<br>";
echo "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";echo "<br>";
echo "<br>";
echo "Cox Index description";
echo "<br>";
echo "Page 16 of https://ies.ed.gov/ncee/wwc/Docs/referenceresources/WWC_Procedures_Handbook_V4_1_Draft.pdf";
echo "<br>";
echo "<br>";
echo "For dichotomous outcomes, the difference in group means is calculated as the difference in";
echo "<br>";
echo "the probability of the occurrence of an event. The effect size measure of choice for dichotomous ";
echo "<br>";
echo "outcomes is the Cox index, which yields effect size values similar to the values of Hedges’ g that";
echo "<br>";
echo "one would obtain if group means, standard deviations, and sample sizes were available, assuming";
echo "<br>";
echo "the dichotomous outcome measure is based on an underlying logistic similar to a normal";
echo "<br>";
echo "distribution. Defining pi and pc as the probability of an outcome for students in the intervention";
echo "<br>";
echo "and comparison groups, the effect size is given by";
echo "<br>";
echo "";
echo "<br>";
}
/*echo "The result is: " . compute();*/
compute();
?>
<br>
REFERENCE
<br>
Cohen's rule of thumb about effect sizes:
<br>
<li>If greater than 02, Small Effect
<br>
<li>If greater than 0.5, Medium Effect
<br>
<li>If greater 0.8 then Large Effect
<br>
Cohen, J. Statistical power for the behavioral sciences (2nd ed.). Hillsdale, NJ: Erlbaum (1988).
<br>
<ahref="https://wmich.edu/sites/default/files/attachments/u58/2015/Effect_Size_Substantive_Interpretation_Guidelines.pdf">
Effect Size Substantive Interpretation Guidelines: Issues in the Interpretation of Effect Sizes Jeff Valentine and Harris Cooper, Duke University(see page. 5)</a>
<br>
<br>
WWC related info:
<br>
<a href="https://ies.ed.gov/ncee/wwc/Docs/ReferenceResources/wwc_procedures_handbook_v4_draft.pdf">WWC procedures handbook (see page. 14)</a>
<br>
<a href="https://ies.ed.gov/ncee/wwc/Docs/OnlineTraining/wwc_training_m3.pdf">WWC standards slides (Definition of small sample size correction, slide 14)</a>
<br>
WWC considers the effect size greater than .25 substnatively important.
<a href="https://ies.ed.gov/ncee/wwc/Docs/referenceresources/wwc_procedures_handbook_v4.pdf">P.22 of WWC standards</a>
<br>
<br>
<a href="calc_t_test1.php">Back to the calculcator </a>
<br>
<a href="https://www.estat.us">My website</a>
<br>
Look at calc process t..1
<br>

Odds ratio in SAS PROC LOGISTIC

Odds ratio in SAS PROC LOGISTICS are just

exp(coefficient)

This is obvious for binary variables.

Even for continuous variables, it's just

exp(coefficient)

..which means that I think it is mot counterintuitive to standardize continuous variables.

So one easy way to get odds ratios is to run PROC LOGISTICS with continuous variables standardized and use exp(X) in Excel.

SAS Proc logistic and WWC effect size

ods trace on;
proc logistic data=final DESCENDING;
model YA01_3_bin =
treat
male
age_log
CH_compass
/*R_compass*/
white
black
/*other_race*/;
ods output ParameterEstimates=kaz1;
run;

data kaz1b;
set kaz1;
if Variable="Intercept" or Variable="treat";
keep Variable Estimate;
run;
proc transpose data=kaz1b out=kaz1bt;
id Variable;
run;

data kaz1bt2;
set kaz1bt;
C_LOGIT=intercept;
T_LOGIT=intercept+treat;
C_EXP=exp(C_LOGIT)/(1+exp(C_LOGIT));
C_ODDS=C_EXP/(1-C_EXP);
C_STEP1=log(C_ODDS);

T_EXP=exp(T_LOGIT)/(1+exp(T_LOGIT));
T_ODDS=T_EXP/(1-T_EXP);
T_STEP1=log(T_ODDS);

STEP2=T_STEP1-C_STEP1;
wwc_effect_size=STEP2/1.65;
odds_ratio=T_ODDS/C_ODDS;
run;

WWC effect size omega factor (to adjust for n) and R sample

https://ies.ed.gov/ncee/wwc/Docs/referenceresources/WWC_Procedures_Handbook_V4_1_Draft.pdf

See Page 16.

two_values<-view1b[3:4]
num<-view1b[2]

omega<-(1-3/(4*num-9))

C_LOGIT<-two_values[1]
C_EXP<-exp(C_LOGIT)/(1+exp(C_LOGIT))
C_ODDS<-C_EXP/(1-C_EXP)
C_STEP1<-log(C_ODDS)
T_LOGIT<-two_values[1]+two_values[2]
T_EXP<-exp(T_LOGIT)/(1+exp(T_LOGIT))
T_ODDS<-T_EXP/(1-T_EXP)
T_STEP1<-log(T_ODDS)
STEP2<-T_STEP1-C_STEP1
wwc_effect_size<-STEP2/1.65

wwc_effect_size_omega<-(omega*STEP2)/1.65

odds_ratio<-T_ODDS/C_ODDS

 

How to create a bar graph off logistic regression results

This is how you can create a graph comparing the treatment group and the comparison group's %s  -- based on findings from the logistic regression model.

Based on the final multivariate model, you get the program effect in logit. For example:

-0.223

You also run the simpler model with the treatment program indicator only. Get the intercept value. For example:

1.7081

When this logic is converted into a %, it will be the unadjusted % of the comparison group.

Use these two numbers to derive %s for the treatment group and comparison group.

https://docs.google.com/spreadsheets/d/0B7AoA5fyqX_sN0RUc0E5aFowb00/edit?usp=sharing&ouid=116965977650967783537&resourcekey=0-nGn3k4fyPrVKP2f4Xm-fOA&rtpof=true&sd=true

The resulting graph fixes the % of the comparison group to the simple % of the comparison group and shows the % of the treatment group based on the program effect adjusted for covariates.

If you use the intercept value and the program effect value from the final multivariate model, the meaning of percentages become non-intuitive. If you center predictors (except for the treatment indicator), the intercept will represent a typical person in the dataset. But this is a bit difficult to undestand.

Without centering, the intercept value will represent someone who is, for example, female, white, etc., depending on omitted categories of the variables. If continuous variables are included and if the value of 0 in that variable is not intuitive, the meaning of the intercept is a difficult to interpret.

I recommend using the intercept from the simple model (only the treatment group is the predictor), so the comparison group % will be fixed at a simple descriptive % of the comparison group.

 

SAS data variable function detepart

if datepart(OffenseDate) > s_date;

 

 

data crim;
set abc.crim_history;
format new_date MMDDYY10.;
format s_date MMDDYY10.;
new_date=datepart(OffenseDate);
s_date=mdy(1,1,2019);
instruction="Drop ";
if new_date > s_date then instruction="Keep";
*if datepart(OffenseDate) > s_date;
run;

#clear console
rm(list = ls())

setwd("C:/sas/(01) D/newdata")

crim<- read_excel("HistoryCombined.xlsx",sheet="History ")

#Creating the new dataset after WV review -- I added dual credit info per grade level
write.foreign(crim, "C:/sas/(01) D/newdata/data.txt",
"C:/sas/(01) D/newdata/data.sas", package="SAS")

 

Packages I use are:

library(broom)
library(compute.es)
library(dplyr)
library(forcats)
library(FSA)
library(gapminder)
library(ggplot2)
library(gmodels)
library(haven)
library(here)
library(leaflet)
library(magrittr)
library(markdown)
library(MatchIt)
library(plyr)
library(psych)
library(purrr)
library(readr)
library(readxl)
library(sas7bdat)
library(sf)
library(sqldf)
library(stringr)
library(summarytools)
library(tidyverse)
library(tmap)
library(tmaptools)
library(descr)
library(MatchIt)
library(sjmisc)
library(writexl)
library(skimr)
library(lme4)
library(lmerTest)
library(car)
library(foreign)