All examples in this document use default R datasets.
x <- 0:9
x[6] <- 15
x <- c(0:4, 15, 6:9) # do it in one line
x##  [1]  0  1  2  3  4 15  6  7  8  945:57 * .42##  [1] 18.90 19.32 19.74 20.16 20.58 21.00 21.42 21.84 22.26 22.68 23.10
## [12] 23.52 23.941:10 %*% matrix(rbeta(400, 2, 1), nrow = 10, ncol = 4)##       [,1]  [,2]  [,3] [,4]
## [1,] 33.02 34.13 39.78 34.4Seatbelts data to include only the drivers, rear, PetrolPrice, and law columnsdata.frame(Seatbelts[, c('drivers', 'rear', 'PetrolPrice', 'law')])CO2 data to include only observations where the plant’s CO\(_2\) uptake rate is less than or equal to 15CO2[which(CO2$uptake <= 15), ]mtcars data in ascending order by cylinders and miles per gallonmtcars[order(mtcars$cyl, mtcars$mpg), ]plot(density(rnorm(1e4, 2, .89)))invlogit() function from arm without loading the packagearm::invlogit(.034)## [1] 0.5085mtcars data, fit a linear model that explains variation in miles per gallon as a function of number of cylinders, displacement, and horsepower. Extract the coefficients, standard error, and R\(^2\) from the model.m1 <- lm(mpg ~ cyl + disp + hp, data = mtcars)
coef(m1)## (Intercept)         cyl        disp          hp 
##    34.18492    -1.22742    -0.01884    -0.01468sqrt(diag(vcov(m1)))## (Intercept)         cyl        disp          hp 
##     2.59078     0.79728     0.01040     0.01465summary(m1)$r.squared## [1] 0.7679Titanic data to fit a model that explains whether a passenger survived the ship’s sinking as a function of their sex, age, and passenger class, but use a probit link function. What is the difference in coefficient estimates between this model and one using the canonical logit link function?coef(glm(Survived ~ Class + Sex + Age, data = Titanic, family = binomial(link = 'probit'))) -
  coef(glm(Survived ~ Class + Sex + Age, data = Titanic, family = binomial(link = 'logit')))## (Intercept)    Class2nd    Class3rd   ClassCrew   SexFemale    AgeAdult 
##  -2.902e-16   3.942e-16   4.920e-16   6.943e-16  -2.156e-16   2.214e-16x <- numeric()
for (i in 1:1e4) {
  
  x[i] <- mean(rnorm(1e3, -2.5, 4))
  
}
mean(x)## [1] -2.501my.mean <- function(x) {
  
  sum(x) / length(x)
  
}
my.mean(1:7)## [1] 4my.mean.NA <- function(x) {
  
  x <- na.omit(x)
  sum(x) / length(x)
  
}
my.mean.NA(c(NA, 1:7, NA))## [1] 4myfunc <- function(x) {
  
  for (i in 1:length(x)) {
    
    if (x[i] %% 2 == 0) {
      
      x[i] <- x[i]^2
      
    } else {
      
      x[i] <- sqrt(x[i])
      
    }
    
  }
  
  x
  
}
myfunc(seq(1, 6, by = .5))##  [1]  1.000  1.225  4.000  1.581  1.732  1.871 16.000  2.121  2.236  2.345
## [11] 36.000airquality data to plot wind speed against temperature. Use separate colors for observations in each month, and include a linear fit line for each month.library(ggplot2)
ggplot(data = airquality, aes(x = Wind, y = Temp, color = as.factor(Month))) +
  geom_point() +
  geom_smooth(method = 'lm', se = F) +
  labs(color = 'Month') +
  scale_color_discrete(labels = c('May', 'Jun', 'Jul', 'Aug', 'Sep')) +
  theme_bw() +
  theme(legend.position = 'right',
        plot.background = element_blank(),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        panel.border = element_blank())