Tidy Tuesday 2 Analyzing web page metrics in R

R
tidymodel
Author

Tony Duan

Published

January 13, 2024

1 package

Code
library(tidyverse)
library(ggplot2)
library(tidymodels)
library(rsample)
library(themis)
library(tidytuesdayR)

library(scales)
library(lubridate)

2 data

Code
clean_data <- . %>%
  select(-timestamp) %>%
  mutate(date = ymd(date))
Code
image_alt <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/image_alt.csv') %>%
  clean_data()

color_contrast <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/color_contrast.csv') %>%
  clean_data()

ally_scores <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/ally_scores.csv') %>%
  clean_data()

bytes_total <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/bytes_total.csv') %>%
  clean_data()

speed_index <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/speed_index.csv') %>%
  clean_data()
Code
head(speed_index)
# A tibble: 6 × 8
  measure    client  date         p10   p25   p50   p75   p90
  <chr>      <chr>   <date>     <dbl> <dbl> <dbl> <dbl> <dbl>
1 speedIndex desktop 2022-10-01  1.59  2.42  3.88  6.45  10.5
2 speedIndex mobile  2022-10-01  2.92  4.03  5.87  8.85  13.3
3 speedIndex desktop 2022-09-01  1.61  2.45  3.91  6.5   10.6
4 speedIndex mobile  2022-09-01  2.92  4.04  5.88  8.86  13.3
5 speedIndex desktop 2022-08-01  1.62  2.48  3.96  6.56  10.6
6 speedIndex mobile  2022-08-01  2.98  4.16  6.12  9.37  14.2
Code
dim(speed_index)
[1] 238   8

2.1 EDA

Code
image_alt %>%
  ggplot(aes(date, percent, color = client)) +
  geom_line() +
  labs(y = "% of images with alt text")

Code
combined_percentages <- bind_rows(image_alt,
                                  color_contrast)

combined_percentages %>%
  ggplot(aes(date, percent / 100, color = client)) +
  geom_line() +
  scale_y_continuous(labels = percent_format()) +
  labs(y = "Percentage") +
  facet_wrap(~ measure)

Code
combined_percentiles <- bind_rows(speed_index,
                                  bytes_total,
                                  ally_scores)

combined_percentiles %>%
  ggplot(aes(date, p50, color = client)) +
  geom_line() +
  geom_ribbon(aes(ymin = p25, ymax = p75), alpha = .25) +
  facet_wrap(~ measure, scales = "free") +
  labs(y = "Median (with 25th-75th percentile)",
       color = "Client")

3 Reference

https://www.youtube.com/watch?v=CRlbkBKI5iU

https://github.com/dgrtwo/data-screencasts/blob/master/2022_11_15_web_page_metrics.Rmd

https://github.com/rfordatascience/tidytuesday/tree/master/data/2022/2022-11-15