Studying R on datacamp

My memo

 

library(gapminder)
library(dplyr)
library(ggplot2)

# Summarize the median GDP and median life expectancy per continent in 2007
by_continent_2007 <- gapminder %>%
filter(year == 2007) %>%
group_by(continent) %>%
summarize(medianGdpPercap = median(gdpPercap),
medianLifeExp = median(lifeExp))

# Use a scatter plot to compare the median GDP and median life expectancy
ggplot(by_continent_2007, aes(x = medianGdpPercap, y = medianLifeExp, color = continent)) +
geom_point()

R regression models

year1<-filter(core2,year==1)
linearMod_y1 <- lm(zpost_test ~ treat + pre_test +Male +Grade09+Grade10+Grade11+Grade12+Other+Black+hispanic+Par_BA+par_edu_miss, data=year1)
print(linearMod_y1)

Memo for R from DataCamp

core2 %>%
# filter(year==4) %>%
group_by(year) %>%
summarize(mean_score=mean(post_test), mean_score2=mean(pre_test))

 

 

core2 %>%
filter(year==4) %>%
summarize(mean_score=mean(post_test), mean_score2=mean(pre_test))

 

# Scatter plot comparing pop and lifeExp, with color representing continent
ggplot(gapminder_1952, aes(x = pop, y = lifeExp, color = continent)) +
geom_point() +
scale_x_log10()

 

library(gapminder)
library(dplyr)
library(ggplot2)

gapminder_1952 <- gapminder %>%
filter(year == 1952)

# Scatter plot comparing pop and gdpPercap, with both axes on a log scale
ggplot(gapminder_1952, aes(x = pop, y = gdpPercap)) +
geom_point() +
scale_x_log10() +
scale_y_log10()

 

 

#this is a popular data editing package
library(tidyverse)
#this includes datasets
library(gapminder)
#this one includes summarize()
library(FSA)

#The dataset gapminder comes from gapminder package
#lifeExp by subgroup continent
result_mean1<-Summarize(lifeExp ~ continent, data=gapminder)
#lifeExp by subgroup continent and year
result_mean2<-Summarize(lifeExp ~ continent + year, data=gapminder)

#results are in result_mean1 and result_mean2

 

 

gapminder %>%
mutate(lifeExp = lifeExp * 12)

 

 

library(gapminder)
library(dplyr)

# Filter, mutate, and arrange the gapminder dataset
gapminder %>%
filter(year == 2007) %>%
mutate(lifeExpMonths = 12 * lifeExp) %>%
arrange(desc(lifeExpMonths))

ggplot(abc1,aes(x=GrowthMindset, y=SelfEfficacy))+
geom_point()

 

#create scales
#problem is that this creates a scale even when there are missing values
wholedata<-transmute(wholedata,flag,dataID,commonID,treat,
GrowthMindset=rowMeans(cbind(q0008_0001, q0008_0002, q0008_0003, q0008_0004, q0008_0005, q0008_0006, q0008_0007, q0008_0008),na.rm=TRUE),
SelfEfficacy=rowMeans(cbind(q0009_0001, q0009_0002, q0009_0003, q0009_0004, q0009_0005),na.rm=TRUE),
MSelfEfficacy=rowMeans(cbind(q0010_0001, q0010_0002, q0010_0003, q0010_0004, q0010_0005, q0010_0006, q0010_0007),na.rm=TRUE),
MathAnxiety=rowMeans(cbind(q0011_0001, q0011_0002, q0011_0003, q0011_0004, q0011_0005, q0011_0006),na.rm=TRUE),
TeacherUse=rowMeans(cbind(q0012_0001, q0012_0002, q0012_0003, q0012_0004, q0013_0001, q0013_0002, q0013_0003, q0013_0004),na.rm=TRUE)
)

Using R to do regular data things

# SINGLE AGGREGATE
#sapply(abc[c("GrowthMindset", "SelfEfficacy", "MSelfEfficacy","MathAnxiety","TeacherUse")], mean)

#this just gets means
sapply(abc[c("GrowthMindset", "SelfEfficacy", "MSelfEfficacy","MathAnxiety","TeacherUse")], function(x) mean(x, na.rm=TRUE))

#this gives me a matrix of means
aggregate(cbind(GrowthMindset, SelfEfficacy, MSelfEfficacy) ~ treat, abc, function(x) mean(x, na.rm=TRUE))-> result1

#this gets me a full results
aggregate(cbind(GrowthMindset, SelfEfficacy, MSelfEfficacy, MathAnxiety,TeacherUse) ~ treat, abc,
function(x) c(sum=sum(x), mean=mean(x), min=min(x), q1=quantile(x)[2],
median=median(x), q3=quantile(x)[4], max=max(x), sd=sd(x)))