Code
library(tidyverse)
library(ggplot2)
library(tidymodels)
library(rsample)
library(themis)
library(tidytuesdayR)
library(scales)
library(lubridate)Tony Duan
January 13, 2024

image_alt <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/image_alt.csv') %>%
clean_data()
color_contrast <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/color_contrast.csv') %>%
clean_data()
ally_scores <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/ally_scores.csv') %>%
clean_data()
bytes_total <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/bytes_total.csv') %>%
clean_data()
speed_index <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/speed_index.csv') %>%
clean_data()# A tibble: 6 × 8
measure client date p10 p25 p50 p75 p90
<chr> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 speedIndex desktop 2022-10-01 1.59 2.42 3.88 6.45 10.5
2 speedIndex mobile 2022-10-01 2.92 4.03 5.87 8.85 13.3
3 speedIndex desktop 2022-09-01 1.61 2.45 3.91 6.5 10.6
4 speedIndex mobile 2022-09-01 2.92 4.04 5.88 8.86 13.3
5 speedIndex desktop 2022-08-01 1.62 2.48 3.96 6.56 10.6
6 speedIndex mobile 2022-08-01 2.98 4.16 6.12 9.37 14.2


combined_percentiles <- bind_rows(speed_index,
bytes_total,
ally_scores)
combined_percentiles %>%
ggplot(aes(date, p50, color = client)) +
geom_line() +
geom_ribbon(aes(ymin = p25, ymax = p75), alpha = .25) +
facet_wrap(~ measure, scales = "free") +
labs(y = "Median (with 25th-75th percentile)",
color = "Client")
https://www.youtube.com/watch?v=CRlbkBKI5iU
https://github.com/dgrtwo/data-screencasts/blob/master/2022_11_15_web_page_metrics.Rmd
https://github.com/rfordatascience/tidytuesday/tree/master/data/2022/2022-11-15
---
title: "Tidy Tuesday 2 : Analyzing web page metrics in R"
author: "Tony Duan"
date: "2024-01-13"
categories: [R,TidyTuesday]
execute:
warning: false
error: false
format:
html:
toc: true
toc-location: left
code-fold: show
code-tools: true
number-sections: true
code-block-bg: true
code-block-border-left: "#31BAE9"
---
{width="600"}
# package
```{r}
library(tidyverse)
library(ggplot2)
library(tidymodels)
library(rsample)
library(themis)
library(tidytuesdayR)
library(scales)
library(lubridate)
```
# data
```{r}
clean_data <- . %>%
select(-timestamp) %>%
mutate(date = ymd(date))
```
```{r}
image_alt <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/image_alt.csv') %>%
clean_data()
color_contrast <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/color_contrast.csv') %>%
clean_data()
ally_scores <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/ally_scores.csv') %>%
clean_data()
bytes_total <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/bytes_total.csv') %>%
clean_data()
speed_index <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/speed_index.csv') %>%
clean_data()
```
```{r}
head(speed_index)
```
```{r}
dim(speed_index)
```
## EDA
```{r}
image_alt %>%
ggplot(aes(date, percent, color = client)) +
geom_line() +
labs(y = "% of images with alt text")
```
```{r}
combined_percentages <- bind_rows(image_alt,
color_contrast)
combined_percentages %>%
ggplot(aes(date, percent / 100, color = client)) +
geom_line() +
scale_y_continuous(labels = percent_format()) +
labs(y = "Percentage") +
facet_wrap(~ measure)
```
```{r}
combined_percentiles <- bind_rows(speed_index,
bytes_total,
ally_scores)
combined_percentiles %>%
ggplot(aes(date, p50, color = client)) +
geom_line() +
geom_ribbon(aes(ymin = p25, ymax = p75), alpha = .25) +
facet_wrap(~ measure, scales = "free") +
labs(y = "Median (with 25th-75th percentile)",
color = "Client")
```
# Reference
https://www.youtube.com/watch?v=CRlbkBKI5iU
https://github.com/dgrtwo/data-screencasts/blob/master/2022_11_15_web_page_metrics.Rmd
https://github.com/rfordatascience/tidytuesday/tree/master/data/2022/2022-11-15