## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  eval = FALSE,
  collapse = TRUE,
  comment = "#>"
)

## ----setup, echo=FALSE, warning=FALSE, error=FALSE, message=FALSE-------------
# 
# # https://www.gov.uk/government/statistics/reported-road-casualties-great-britain-pedestrian-factsheet-2023/reported-road-casualties-in-great-britain-pedestrian-factsheet-2023
# 
# #library(stats19)
# library(stats19)
# library(sf)
# library(dplyr)
# library(lubridate)
# library(reshape2)
# library(ggplot2)
# library(knitr)
# library(readODS)
# library(gt)
# library(clock)
# library(stringr)
# library(tidyr)
# 
# # define plot dimensions
# knitr::opts_chunk$set(
#   out.width = "100%"  # scale relative to text width
# )
# 
#   # what casualty is the report for? options Pedestrian, Cyclist, escooters
#   report_casualty <- "Pedestrian"
# 
#   # stats19 usually updated in September, so if it is October last years data should be there
#   yr2calc <- 2024
# 
#   # request collision data (entering 2004 results in a table with all years)
#   crashes = get_stats19(year = "2004", type = "collision", ask = FALSE, format = TRUE, output_format = "data.frame") |>
#     filter(collision_year >= 2004)
# 
#   # import the adjusted casualty data ready to join to the original
#   #adj <- get_stats19_adjustments()
# 
# 
#   ## request casualty
#   casualties = get_stats19(year = "2004", type = "casualty", ask = FALSE, format = TRUE, output_format = "data.frame") |>
#     filter(collision_year >= 2004) |>
#     mutate(fatal_count = if_else(casualty_severity == "Fatal", 1, 0)) # add a column for fatal tally to enable same method to be used for serious and slight
# 
#   ## request vehicle
#   vehicles = get_stats19(year = "2004", type = "vehicle", ask = FALSE, format = TRUE, output_format = "data.frame") |>
#     filter(collision_year >= 2004)
# 
# 
# # get population data from https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates
#   uk_pop <- read.csv("https://www.ons.gov.uk/file?uri=/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/populationestimatestimeseriesdataset/current/pop.csv", skip = 7)
#   uk_pop <- uk_pop[,c(1,6)]
#   names(uk_pop) <- c("Year", "Population")
# 
# 
#   # get trip data NTS0303 https://www.gov.uk/government/statistical-data-sets/nts03-modal-comparisons
#   download.file("https://assets.publishing.service.gov.uk/media/66ce0f118e33f28aae7e1f75/nts0303.ods", destfile = "nts0303.ods", mode = "wb")
#   # dataset links to this sheet, but it is total distance and not dissagregated by travel mode
#   #download.file("https://assets.publishing.service.gov.uk/media/66ce0e818e33f28aae7e1f71/nts0101.ods", destfile = "nts0101.ods", mode = "wb")
# 
# 
#   # read in trip data
#   trips <- read_ods("nts0303.ods", sheet = "NTS0303c_miles", skip = 5) |>
#     left_join(uk_pop, by = "Year") |>
#     mutate(tot_dist_billion_miles = (`Walk [notes 2, 3]`*Population)/10^9)
# 
#   # speed up debugging by saving these key dfs locally
# #   save(casualties,crashes,vehicles, trips, file = "all_years.RData")
# 
#   #load("all_years.RData")
# 
#     # # most of the data is based on the last 5 years, speed up calcs by creating df for this
# #crashes$number_of_casualties <- as.numeric(crashes$number_of_casualties)
#   cra_L5Y <- filter(crashes, collision_year <= yr2calc & collision_year >= yr2calc-4)
#   cas_L5Y <- filter(casualties, collision_year <= yr2calc & collision_year >= yr2calc-4)
#   veh_L5Y <- filter(vehicles, collision_year <= yr2calc & collision_year >= yr2calc-4)
# #
# #
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# # pick out data only for 2004
# fat_cas_2004 <- casualties |> filter(collision_year == "2004" & casualty_severity == "Fatal" & casualty_class == report_casualty)
# #casualties this year (TY)
# fat_cas_TY <- casualties |> filter(collision_year == yr2calc & casualty_severity == "Fatal" & casualty_class == report_casualty)
# 
# if(NROW(fat_cas_2004)>NROW(fat_cas_TY)){
#   ud <- "down"
#   fat_cas_diff <- round((1-NROW(fat_cas_TY)/NROW(fat_cas_2004))*100)
# } else {
#   ud <- "up"
#   fat_cas_diff <- round((1-NROW(fat_cas_2004)/NROW(fat_cas_TY))*100)
# }
# 
# ser_cas_2004 <- casualties |> filter(collision_year == "2004" & casualty_class == report_casualty)
# #casualties this year (TY)
# ser_cas_TY <- casualties |> filter(collision_year == yr2calc & casualty_class == report_casualty)
# 
# if(NROW(ser_cas_2004)>NROW(ser_cas_TY)){
#   id <- "decreased"
#   ser_cas_diff <-(((sum(ser_cas_2004$casualty_adjusted_severity_serious, na.rm = TRUE)-sum(ser_cas_TY$casualty_adjusted_severity_serious,na.rm = TRUE))/sum(ser_cas_2004$casualty_adjusted_severity_serious, na.rm = TRUE))*100)
# } else {
#   id <- "increased"
#   ser_cas_diff <- round((1-sum(ser_cas_2004$casualties,na.rm = TRUE)/sum(ser_cas_TY$casualties))*100)
# }
# 
# dist_walked_2004 <- filter(trips, Year == "2004")
# dist_walked_TY <- filter(trips, Year == yr2calc)
# 
# if(dist_walked_TY$tot_dist_billion_miles<dist_walked_2004$tot_dist_billion_miles){
#   ped_miles_id <- "decreased"
#   ped_traf <- (dist_walked_2004$tot_dist_billion_miles/dist_walked_TY$tot_dist_billion_miles)-1
# } else {
#   ped_miles_id <- "increased"
#   ped_traf <- (dist_walked_TY$tot_dist_billion_miles/dist_walked_2004$tot_dist_billion_miles)-1
# }
# 
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# crash_cas <- inner_join(cas_L5Y,cra_L5Y) |>
#   filter(casualty_type == report_casualty)
# 
# # create column for weeks
# dths_per_wk_fat <- crash_cas |>
#   filter(casualty_severity == "Fatal" & casualty_class == report_casualty) |>  ## pick out casualty the stats will focus on
#   mutate(wk = isoweek(date),## determine the week number of each date
#          yr = year(date)) |>  ## add year so we can include all weeks over the 5 years
#   group_by(wk,yr) |>
#   summarise(casualties = sum(fatal_count))
# 
# # create column for weeks
# dths_per_wk_ser <- crash_cas %>%
#   mutate(wk = isoweek(date),# determine the week number of each date
#          yr = year(date)) %>% ## add year so all weeks over the 5 years are included
#   filter(casualty_class == report_casualty) |>  ## pick out casualty the stats will focus on
#   group_by(wk,yr) %>%
#   summarise(casualties = sum(casualty_adjusted_severity_serious,na.rm = TRUE))
# 
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# cas_summary <- cas_L5Y |>
#   select(collision_index, casualty_class, fatal_count, casualty_adjusted_severity_serious, casualty_adjusted_severity_slight) |>
#   filter(casualty_class == report_casualty) |>
#   group_by(collision_index, casualty_class) |>
#    summarise(Fatal = sum(fatal_count),
#             Serious = sum(casualty_adjusted_severity_serious,na.rm = TRUE),
#             Slight = sum(casualty_adjusted_severity_slight,na.rm = TRUE)) |>
#   tidyr::pivot_wider(names_from = "casualty_class", values_from = c("Fatal","Serious","Slight"))
# 
# if(yr2calc > 2023){
# 
# junction_pc <- cas_summary |>
#   left_join(cra_L5Y) |>
#   group_by(junction_detail) |>
#   summarise(fatal_casualties = sum(Fatal_Pedestrian),
#             serious_casualties = sum(Serious_Pedestrian,na.rm = TRUE),
#             slight_casualties = sum(Slight_Pedestrian,na.rm = TRUE)) |>
#   rowwise() |>
#   mutate(All = sum(fatal_casualties, serious_casualties, slight_casualties)) |>
#   ungroup() |>
#   transmute(Junction = junction_detail,
#             Fatalities = fatal_casualties/sum(fatal_casualties)*100,
#          Serious = serious_casualties/sum(serious_casualties)*100,
#          Slight = slight_casualties/sum(slight_casualties)*100,
#          All = All/sum(All)*100) |>
#   mutate_if(is.numeric, round,1) |>
#   arrange(desc(All))
# } else {
#   junction_pc <- cas_summary |>
#   left_join(cra_L5Y) |>
#   group_by(junction_detail_historic) |>
#   summarise(fatal_casualties = sum(Fatal_Pedestrian),
#             serious_casualties = sum(Serious_Pedestrian,na.rm = TRUE),
#             slight_casualties = sum(Slight_Pedestrian,na.rm = TRUE)) |>
#   rowwise() |>
#   mutate(All = sum(fatal_casualties, serious_casualties, slight_casualties)) |>
#   ungroup() |>
#   transmute(Junction = junction_detail_historic,
#             Fatalities = fatal_casualties/sum(fatal_casualties)*100,
#          Serious = serious_casualties/sum(serious_casualties)*100,
#          Slight = slight_casualties/sum(slight_casualties)*100,
#          All = All/sum(All)*100) |>
#   mutate_if(is.numeric, round,1) |>
#   arrange(desc(All))
# }
# 
# ## stats for within 20m of junctions
# not_within_20 <- junction_pc %>%
#   filter(Junction == "Not at junction or within 20 metres")
# 
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# 
# ## create some approximate groups
# vehicle_groups <- data.frame(summary_group = c("pedal cycle", "motorcycle", "motorcycle", "motorcycle", "motorcycle", "motorcycle",
#                                                "motorcycle", "car", "other vehicle", "other vehicle", "car", "bus or coach", "car", "light goods vehicle", "heavy goods vehicle", "heavy goods vehicle",
#                                                "other vehicle", "other vehicle", "other vehicle", "other vehicle", "other vehicle", "other vehicle"),
#                              vehicle_type =  c("Pedal cycle","Motorcycle - unknown cc", "Electric motorcycle" , "Motorcycle 125cc and under",
#                                                  "Motorcycle 50cc and under","Motorcycle over 125cc and up to 500cc", "Motorcycle over 500cc",
#                                                  "Car", "Agricultural vehicle", "Tram","Taxi/Private hire car",
#                                                  "Bus or coach (17 or more pass seats)", "Minibus (8 - 16 passenger seats)", "Van / Goods 3.5 tonnes mgw or under",
#                                                  "Goods over 3.5t. and under 7.5t", "Goods 7.5 tonnes mgw and over", "Other vehicle", "Unknown vehicle type (self rep only)",
#                                                "Goods vehicle - unknown weight", "Mobility scooter", "Ridden horse", "Data missing or out of range"))
# 
# 
# # join the casualty summary table with crashes and vehicles
# veh_cas_sum <- cas_summary |>
#   left_join(veh_L5Y, by = "collision_index") |> # join the vehicles to get data on type of vehicle
#   left_join(cra_L5Y, by = "collision_index") |> # join crashes as number of vehicles is included, quicker than calculating from veh table
#   select(collision_index, vehicle_type, number_of_vehicles, Fatal_Pedestrian, Serious_Pedestrian, Slight_Pedestrian) |>
#   left_join(vehicle_groups, by = "vehicle_type") |>
#   distinct(collision_index, .keep_all = TRUE) # there is a row for each vehicle in a collision, get rid of duplicates
# 
# 
# all_vehicles <- veh_cas_sum |>
#   group_by(number_of_vehicles, summary_group) |>
#    summarise(Fatal = round(sum(Fatal_Pedestrian)),
#             Serious = round(sum(Serious_Pedestrian,na.rm = TRUE)),
#             Slight = round(sum(Slight_Pedestrian,na.rm = TRUE))) |>
#   rowwise() |>
#   mutate(All = sum(Fatal,Serious,Slight)) |>
#   ungroup() |>
#   mutate(pc_fat = round(Fatal/sum(Fatal)*100,1)) |>
#   arrange(desc(Fatal))
# 
# single_car <- filter(all_vehicles, number_of_vehicles == 1 & summary_group == "car")
# 
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# road_type <- cas_summary |>
#   left_join(cra_L5Y, by = "collision_index") |>
#   #select(collision_severity, casualty_type, datetime, first_road_class, urban_or_rural_area, number_of_casualties,collision_reference) %>%
#   mutate(first_road_class = case_when(first_road_class == "A" ~ "Other",first_road_class == "B" ~ "Other",first_road_class == "C" ~ "Other",
#                          first_road_class == "Unclassified" ~ "Other",first_road_class == "A(M)" ~ "Other",first_road_class == "Motorway" ~ "Motorway")) |>
#   group_by(first_road_class, urban_or_rural_area) %>%
#    summarise(Fatal = round(sum(Fatal_Pedestrian)),
#             Serious = round(sum(Serious_Pedestrian,na.rm = TRUE)),
#             Slight = round(sum(Slight_Pedestrian,na.rm = TRUE))) |>
#   filter(!urban_or_rural_area == "Unallocated")
# 
# m_way <- road_type %>%
#   filter(first_road_class == "Motorway") |>
#   select(road_class = first_road_class,
#          Fatal,
#          Serious,
#          Slight)
# 
# rural_urban <- road_type %>%
#   filter(first_road_class == "Other") |>
#   ungroup() |>
#   select(road_class = urban_or_rural_area,
#          Fatal,
#          Serious,
#          Slight)
# 
# 
# road_types <- rbind(m_way, rural_urban)|>
#   group_by(road_class) |>
#   summarise(Fatal = round(sum(Fatal)),
#             Serious = round(sum(Serious,na.rm = TRUE)),
#             Slight = round(sum(Slight,na.rm = TRUE))) |>
#   tidyr::pivot_longer(cols =  c("Fatal", "Serious", "Slight")) |>
#   group_by(road_class, name) %>%
#   summarise(value = sum(value)) |>
#   group_by(name) %>%
#   mutate(pc = value/sum(value)*100)
# 
# road_type_all <- rbind(m_way, rural_urban)|>
#   group_by(road_class) |>
#   summarise(Fatal = round(sum(Fatal)),
#             Serious = round(sum(Serious,na.rm = TRUE)),
#             Slight = round(sum(Slight,na.rm = TRUE))) |>
#   tidyr::pivot_longer(cols =  c("Fatal", "Serious", "Slight")) |>
#   group_by(road_class) %>%
#   summarise(value = sum(value)) %>%
#   mutate(pc = value/sum(value)*100) |>
#   mutate(name = "All casualties")
# 
# rural_all <-  filter(road_type_all, road_class == "Rural")
# 
# fatal_rural <- filter(road_types,road_class == "Rural" & name == "Fatal")
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# ## male female
# sex_casualty <- cas_L5Y %>%
#   filter(casualty_type == "Pedestrian") %>%
#   group_by(sex_of_casualty) %>%
#   summarise(fatal_casualties = sum(fatal_count),
#             serious_casualties = sum(casualty_adjusted_severity_serious,na.rm = TRUE),
#             slight_casualties = sum(casualty_adjusted_severity_slight, na.rm = TRUE))
# 
# serious_fatal_male <- sex_casualty %>%
#   rowwise() |>
#   #filter(sex_of_casualty == "Male") |>
#   mutate(KSI = sum(fatal_casualties, serious_casualties)) |>
#   ungroup() |>
#   mutate(pc_ksi = KSI/sum(KSI))
# 
# pc_fatal_serious_male <- filter(serious_fatal_male, sex_of_casualty == "Male")
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# ## only latest year (TY) in report
# fatal_TY <- filter(casualties, collision_year == yr2calc, casualty_type == report_casualty, casualty_severity == "Fatal")
# 
# serious_TY <- filter(casualties, collision_year == yr2calc, casualty_type == report_casualty)
# 
# slight_TY <- filter(casualties, collision_year == yr2calc, casualty_type == report_casualty)
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# trips <- select(trips, Year, Traffic = tot_dist_billion_miles)
# 
# table_1 <- casualties |>
#   filter(casualty_type == report_casualty & collision_year >= 2004 & collision_year <= yr2calc) |> # add in filter for calculating past years when later data is available
#   group_by(collision_year) |>
#   summarise(Fatal = sum(fatal_count),
#             Serious = sum(casualty_adjusted_severity_serious,na.rm = TRUE),
#             Slight = sum(casualty_adjusted_severity_slight,na.rm = TRUE)) |>
#   select(collision_year,Fatal, Serious, Slight) |>
#   #tidyr::pivot_wider(names_from = "casualty_severity", values_from = "casualties") |>
#   rowwise() |>
#   mutate(All = sum(c(Fatal, Serious, Slight))) |>
#   left_join(trips, by = c("collision_year" = "Year"))
# 
# dist_walked_2004 <- filter(trips, Year == 2004)
# dist_walked_TY <- filter(trips, Year == yr2calc)
# 
# if(dist_walked_2004$Traffic > dist_walked_TY$Traffic){
#   dw <- "decreased"
# } else {
#   dw <- "increased"
# }
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# ## change between this year (TY) and last year (LY)
# 
# diff_fatal <- (table_1$Fatal[NROW(table_1)]-table_1$Fatal[NROW(table_1)-1])/table_1$Fatal[NROW(table_1)-1]
# 
# diff_trips <- abs(table_1$Traffic[NROW(table_1)]-table_1$Traffic[NROW(table_1)-1])/table_1$Traffic[NROW(table_1)-1]
# 
# # pedestrian fatalities increased or decreased
# if(table_1$Fatal[NROW(table_1)]>table_1$Fatal[NROW(table_1)-1]){
#   pf <- "increased"
# } else {
#   pf <- "decreased"
# }
# 
# # pedestrian casualties all severities increased or decreased
# if(table_1$All[NROW(table_1)]>table_1$All[NROW(table_1)-1]){
#   pcr <- "increased"
# } else {
#   pcr <- "fallen"
# }
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# bm_vals <- table_1 %>% filter(collision_year == 2004) %>% select(collision_year,Fatal, Serious, Slight, Traffic)
# 
# rates <- table_1 %>%
#   select(collision_year,Fatal, Serious,Slight,Traffic) |>
#   mutate(Fatal = Fatal/bm_vals$Fatal*100,
#          Serious = Serious/bm_vals$Serious*100,
#          Slight = Slight/bm_vals$Slight*100,
#          Traffic = Traffic/bm_vals$Traffic*100)
# 
# chart_1 <- rates |> melt("collision_year")
# 
# cols <- rev(c("#001a70", "#ff7733", "#1de9b6","#006853"))
# cust_theme <- theme(panel.grid.major = element_line(size = 2))
# # put the elements in a list
# dft_theme <- list(cust_theme, scale_color_manual(values = cols))
# 
# chart_1 %>%
#   ggplot(aes(collision_year, value, color = variable)) +
#   geom_line(size = 2, alpha = .8) +
#   dft_theme+
#   theme(panel.background = element_blank(),
#         legend.position = "top",
#     legend.title = element_blank()) +
#   scale_x_continuous(expand = c(0, 0)) +
#   geom_hline(yintercept=100, linetype='dotted', col = 'black')+
#   ggtitle(paste0("Chart 1: Index of casualties by severity, GB: 2004 to ", yr2calc," (Index 2004=100)")) +
#   scale_x_continuous(name = NULL,
#     breaks = seq(2004, 2023, by = 2)  # Add more tick marks
#   ) +
#   labs(caption = "Source: Stats19")+
#   theme(panel.border = element_blank())
# 
# 
# # round all the values for the table print
# table_1_out <- table_1  |>
#    mutate(Fatal = round(Fatal),
#           Serious = round(Serious),
#           Slight = round(Slight),
#           All = round(All),
#           Traffic = round(Traffic,2))
# 
# 
# gt(table_1_out,auto_align = TRUE) |>
#   cols_width(collision_year ~px(60)) |>
#   cols_label(collision_year = md("**Year**"),
#              Fatal = md("**Killed**"),
#              Serious = md("**Serious**"),
#              Slight = md("**Slight**"),
#              All = md("**All**"),
#              Traffic = md("**Traffic**")) |>
#   tab_footnote(md("**Source: DfT STATS19, National Travel Survey and Office for National\nStatistics population data**")) |>
#   tab_header(
#     title = md(paste0("**Table 1: Number of reported pedestrian casualties by severity and traffic\n(pedestrian billion miles walked), GB: 2004 to ", yr2calc,"**"))) |>
#   tab_options(heading.align = "left",
#               column_labels.border.top.style = "none",
#               table.border.top.style = "none",
#               column_labels.border.bottom.style = "none",
#               column_labels.border.bottom.width = 1,
#               column_labels.border.bottom.color = "black",
#               table_body.border.top.style = "none",
#               table_body.border.bottom.color = "white",
#               heading.border.bottom.style = "none",
#               table.border.bottom.style = "none",) |>
#   tab_style(
#     style = cell_text(weight = "bold"),
#     locations = list(
#       cells_column_labels(columns = c(collision_year)),
#       cells_body(columns = c(collision_year))
#     )) |>
#   tab_style(
#     style = cell_fill(color = "white"),
#     locations = cells_body(columns = everything())
#   )
# 
# table_2 <- table_1 |>
#   transmute(collision_year,
#             Fatal = round(Fatal/Traffic),
#          Serious = round(Serious/Traffic),
#          Slight = round(Slight/Traffic),
#          All = round(All/Traffic))
# 
# bm_vals_2 <- table_2 %>% filter(collision_year == 2004) %>% select(collision_year,Fatal, Serious, Slight,All)
# 
# rates_2 <- table_2 %>%
#   mutate(Fatal = Fatal/bm_vals_2$Fatal*100,
#          Serious = Serious/bm_vals_2$Serious*100,
#          Slight = Slight/bm_vals_2$Slight*100,
#          All = All/bm_vals_2$All*100)
# 
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# # compare the last year to 2004 for all stats
# diff_all_2004 <- (1-(rates_2$All[NROW(rates_2)]/rates_2$All[1]))*100
# diff_fat_2004 <- (1-(rates_2$Fatal[NROW(rates_2)]/rates_2$Fatal[1]))*100
# diff_sev_2004 <- (1-(rates_2$Serious[NROW(rates_2)]/rates_2$Serious[1]))*100
# diff_sli_2004 <- (1-(rates_2$Slight[NROW(rates_2)]/rates_2$Slight[1]))*100
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# 
# 
# # pick out traffic
# chart_2 <- rates_2 |>
#   melt("collision_year") |>
#   filter(!variable == "All")
# 
# # define the colour palette
# cols <- rev(c("#ff7733", "#1de9b6","#006853"))
# cust_theme <- theme(panel.grid.major = element_line(size = 2))
# # put the elements in a list
# dft_theme <- list(cust_theme, scale_color_manual(values = cols))
# 
# 
# 
# chart_2 %>%
#   ggplot(aes(collision_year, value, color = variable)) +
#   geom_line(size = 2, alpha = .8) +
#   dft_theme+
#   theme(panel.background = element_blank(),
#         legend.position = "top",
#     legend.title = element_blank()) +
#   scale_x_continuous(expand = c(0, 0)) +
#   geom_hline(yintercept=100, linetype='dotted', col = 'black')+
#   ggtitle(paste0("Chart 2: Index of casualties by severity, GB: 2004 to ", yr2calc," (Index 2004=100)")) +
#   scale_x_continuous(name = NULL,
#     breaks = seq(2004, 2023, by = 2)  # Add more tick marks
#   ) +
#   labs(caption = "Source: Stats19")
# 
# 
# 
# gt(table_2,auto_align = FALSE) |>
#   cols_label(collision_year = md("**Year**"),
#              Fatal = md("**Killed**"),
#              Serious = md("**Serious**"),
#              Slight = md("**Slight**"),
#              All = md("**All**")) |>
#   tab_header(
#     title = md(paste0("**Table 2: Casualty rates of pedestrian casualties by severity per billion miles walked, GB: 2004 to ",yr2calc,"**"))) |>
#   tab_options(heading.align = "left",
#               column_labels.border.top.style = "none",
#               table.border.top.style = "none",
#               column_labels.border.bottom.style = "none",
#               column_labels.border.bottom.width = 1,
#               column_labels.border.bottom.color = "#334422",
#               table_body.border.top.style = "none",
#               table_body.border.bottom.color = "white",
#               heading.border.bottom.style = "none",
#               table.border.bottom.style = "none") |>
#   tab_style(
#     style = cell_text(weight = "bold"),
#     locations = list(
#       cells_column_labels(columns = c(collision_year)),
#       cells_body(columns = c(collision_year))
#     ))
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# # by sex and age
# # use raw casualty df as there are multiple rows per collision. Bin the ages into custom bins that match the document
# sac_all <- cas_L5Y %>%
#   filter(casualty_type == report_casualty) |>
#   mutate(age_band = cut(as.numeric(age_of_casualty), breaks=c(0,11,15,19,24,29,39,49,59,69,100),labels=c("0-11","12-15","16-19","20-24","25-29","30-39","40-49","50-59","60-69","70+"))) |>
#   group_by(sex_of_casualty, age_band) %>%
#   summarise(Fatal = sum(fatal_count),
#             Serious = sum(casualty_adjusted_severity_serious,na.rm = TRUE),
#             Slight = sum(casualty_adjusted_severity_slight,na.rm = TRUE)) |>
#     filter(!is.na(age_band)) |>
#   rowwise() |>
#   mutate(All = sum(Fatal,Serious))
# 
# mf <- sac_all |> group_by(sex_of_casualty) |> summarise(all = sum(All)) |> ungroup() |> mutate(pc = all/sum(all))
# 
# male_tot <- filter(mf, sex_of_casualty == "Male")
# female_tot <- filter(mf, sex_of_casualty == "Female")
# 
# male_times <- male_tot$all/female_tot$all
# 
# # age band 1
# ab1 <- "30-39"
# 
# # male female casualties for this age band
# sac_ab1 <- sac_all |>  filter(age_band == ab1)
# 
# 
# 
# ab2 <- "0-11"
# 
# # male female casualties for this age band
# sac_ab2 <- sac_all |>  filter(age_band == ab2)
# 
# ab3 <- "70+"
# 
# # male female casualties for this age band
# sac_ab3 <- sac_all |>  filter(age_band == ab3)
# 
# # add pc_ksi for only Male and Female
# sac_all <- sac_all |> ungroup() |> mutate(pc_ksi = (All/sum(All))*100) |> filter(sex_of_casualty %in% c("Male", "Female"))
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# # Define colours and theme
# cols <- rev(c("#1de9b6", "#006853"))
# cust_theme <- theme(panel.grid.major = element_line(size = 2))
# dft_theme <- list(cust_theme, scale_fill_manual(values = cols))  # use fill, not color
# 
# ggplot(sac_all, aes(x = age_band, y = pc_ksi, fill = sex_of_casualty)) +
#   geom_bar(stat = "identity", position = position_dodge(width = 0.7), width = 0.7) +
#   geom_text(
#     aes(label = paste0(round(pc_ksi),"%")),  # Round values to 1 decimal place
#     position = position_dodge(width = 0.7),
#     vjust = -0.5,
#     size = 3
#   ) +
#   ggtitle(paste0("Chart 3: Percentage of ", tolower(report_casualty), " KSI casualties, by sex and age, GB: ", yr2calc-4, " to ", yr2calc)) +
#   dft_theme +
#   theme(
#     panel.background = element_blank(),
#     legend.position = "top",
#     legend.title = element_blank()
#   ) +
#   ylab(NULL)+
#   xlab(NULL)+
#   labs(caption = "Source: Stats19")
# 
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# 
# # split up different vehicle numbers into individual dfs and join later for the table
# single_vehicles <- veh_cas_sum |>
#   filter(number_of_vehicles == 1) |>
#   group_by(summary_group) |>
#    summarise(Fatal = round(sum(Fatal_Pedestrian)),
#             Serious = round(sum(Serious_Pedestrian,na.rm = TRUE)),
#             Slight = round(sum(Slight_Pedestrian,na.rm = TRUE))) |>
#   rowwise() |>
#   mutate(All = sum(Fatal,Serious,Slight)) |>
#   ungroup() |>
#   mutate(pc_fat = round(Fatal/All*100,1)) |>
#   mutate(summary_group = factor(summary_group, levels = c("pedal cycle","motorcycle",  "car","bus or coach","light goods vehicle",  "heavy goods vehicle", "other vehicle"))) |>
#   arrange(summary_group) |>
#   mutate(summary_group = paste(1,summary_group))
# 
# 
# two_vehicles <- veh_cas_sum |>
#   filter(number_of_vehicles == 2) |>
#   mutate(summary_group = "2 vehicles involved") |>
#   group_by(summary_group) |>
#    summarise(Fatal = round(sum(Fatal_Pedestrian)),
#             Serious = round(sum(Serious_Pedestrian,na.rm = TRUE)),
#             Slight = round(sum(Slight_Pedestrian,na.rm = TRUE))) |>
#   rowwise() |>
#   mutate(All = sum(Fatal,Serious,Slight)) |>
#   ungroup() |>
#   mutate(pc_fat = round(Fatal/All*100,1))
# 
# GT_two_vehicles <- veh_cas_sum |>
#   filter(number_of_vehicles > 2) |>
#   mutate(summary_group = "3 or more other vehicles involved") |>
#   group_by(summary_group) |>
#    summarise(Fatal = round(sum(Fatal_Pedestrian)),
#             Serious = round(sum(Serious_Pedestrian,na.rm = TRUE)),
#             Slight = round(sum(Slight_Pedestrian,na.rm = TRUE))) |>
#   rowwise() |>
#   mutate(All = round(sum(Fatal,Serious,Slight))) |>
#   ungroup() |>
#   mutate(pc_fat = round(Fatal/All*100,1))
# 
# # sort by percentage fatal for the text
# most_fat <- rbind(single_vehicles,two_vehicles,GT_two_vehicles) |>
#   arrange(desc(pc_fat))
# 
# # create a totals row
# totals <- rbind(single_vehicles,two_vehicles,GT_two_vehicles) |>
#   summarise(across(where(is.numeric), sum), group = "Total") |>
#    mutate(pc_fat = round(Fatal/All*100,1),
#           summary_group = group) |>
#   select(-group)
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# # bind them all together for table 3
# table_3 <- rbind(single_vehicles,two_vehicles,GT_two_vehicles, totals)
# 
# # create table 3
# gt(table_3,auto_align = FALSE) |>
#   cols_label(summary_group = md("**Vehicles**"),
#              Fatal = md("**Fatalities**"),
#              Serious = md("**Serious injuries**"),
#              Slight = md("**Slight injuries**"),
#              All = md("**All casualties**"),
#              pc_fat = md("**% Fatalities**")) |>
#   tab_header(
#     title = md(paste0("**Table 3: Pedestrian casualties in reported road collisions by severity showing other vehicles involved GB: ", yr2calc-4, " to ",yr2calc,"**"))) |>
#   tab_options(heading.align = "left",
#               column_labels.border.top.style = "none",
#               table.border.top.style = "none",
#               column_labels.border.bottom.style = "none",
#               column_labels.border.bottom.width = 1,
#               column_labels.border.bottom.color = "#334422",
#               table_body.border.top.style = "none",
#               table_body.border.bottom.color = "white",
#               heading.border.bottom.style = "none",
#               table.border.bottom.style = "none") |>
#   tab_style(
#     style = cell_text(weight = "bold"),
#     locations = list(
#       cells_column_labels(columns = c(summary_group)),
#       cells_body(columns = c(summary_group))
#     ))
# 
# 
# 
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# ## create a table of severity by year
# # used clock as much faster than lubridate
# crash_time <- cas_summary |>
#   left_join(cra_L5Y, by = "collision_index") |> # join crashes as number of vehicles is included, quicker than calculating from veh table
#   select(datetime, Fatal_Pedestrian, Serious_Pedestrian) |>
#   mutate(#collision_hr = lubridate::hour(datetime),
#          dow = clock::date_weekday_factor(datetime, abbreviate = FALSE),
#         collision_hr = get_hour(datetime),
#         KSI = sum(Fatal_Pedestrian, Serious_Pedestrian)) |>
#   mutate(dow = case_when(dow == "Monday" ~  "Monday to Friday",
#          dow == "Tuesday" ~  "Monday to Friday",
#          dow == "Wednesday" ~  "Monday to Friday",
#          dow == "Thursday" ~  "Monday to Friday",
#          dow == "Friday" ~  "Monday to Friday",
#          dow == "Saturday" ~  "Saturday",
#          dow == "Sunday" ~  "Sunday")) |>
#   #mutate(dow = case_when(dow > 1 & dow < 7 ~  "Monday to Friday", dow == 7 ~ "Saturday", dow == 1 ~ "Sunday")) |>
#   group_by(collision_hr, dow) |>
#     summarise(KSI = sum(KSI)) |>
#   mutate(KSI = if_else(dow == "Monday to Friday", KSI/5, KSI))
# 
# MF_peak <- crash_time |> filter(dow == "Monday to Friday") |> arrange(desc(KSI)) |> mutate(hr = str_sub(gsub(" ","", tolower(format(strptime(collision_hr, format = "%H"), "%I %p"))),2))
# 
# SS_peak <- crash_time |> filter(dow %in% c("Saturday", "Sunday")) |> group_by(collision_hr) |> summarise(KSI = sum(KSI)) |> arrange(desc(KSI)) |> mutate(hr = str_sub(gsub(" ","", tolower(format(strptime(collision_hr, format = "%H"), "%I %p"))),2))
# 
# # define the colour palette
# cols <- rev(c("#ff7733", "#1de9b6","#006853"))
# cust_theme <- theme(panel.grid.major = element_line(size = 2))
# # put the elements in a list
# dft_theme <- list(cust_theme, scale_color_manual(values = cols))
# 
# crash_time %>%
#   ggplot(aes(collision_hr, KSI, color = dow)) +
#   geom_line(size = 2, alpha = .8) +
#   dft_theme+
#   theme(panel.background = element_blank(),
#         legend.position = "top", legend.title = element_blank()) +
#   scale_x_continuous(expand = c(0, 0)) +
#   ggtitle(paste0("Chart 4: Reported ", tolower(report_casualty), " KSIs by hour of day and day of week, GB: ", yr2calc-4, " to ", yr2calc)) +
#    ylab(NULL)+
#   labs(x = "Hour starting", caption = "Source: Stats19")
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# # see section 1 for original table
# 
# road_type_bar <- rbind(road_types, road_type_all) |>
#   filter(!road_class == "Data missing or out of range")
# 
# fatal_urban <- filter(road_type_bar, name == "Fatal" & road_class == "Urban")
# 
# all_cas <- filter(road_type_bar, name == "All casualties" & road_class == "Urban")
# 
# fatal_mway <- filter(road_type_bar, name == "Fatal" & road_class == "Motorway")
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# # define the colour palette
# cols <- rev(c("#ff7733", "#1de9b6","#006853"))
# cust_theme <- theme(panel.grid.major = element_line(size = 2))
# dft_theme <- list(cust_theme, scale_fill_manual(values = cols))  # use fill, not co
# 
# # Grouped bar
# ggplot(road_type_bar, aes(fill=road_class, y=pc, x=name, label = paste0(round(pc),"%"))) +
#   geom_bar(position="dodge", stat="identity") +
#   dft_theme +
#   theme(
#     panel.background = element_blank(),
#     legend.position = "top",
#     legend.title = element_blank()
#   ) +
#   theme(panel.background = element_blank()) +
#   geom_text(position = position_dodge2(width = 0.9, preserve = "single"), angle = 0, vjust=-0.5, hjust=0.5) +
#   xlab(NULL)+
#   ylab(NULL)+
#   ggtitle(paste0("Chart 5: Percentage of pedestrian casualties, by urban or rural classification and severity, GB: ", yr2calc-4, " to ", yr2calc)) +
#   labs(caption = "Source: Stats19")
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# # see section one
# 
# highest_fat <- junction_pc |> arrange(desc(Fatalities))
# highest_sev <- junction_pc |> arrange(desc(Serious))
# 
# if(yr2calc > 2023){
# 
# junc <- junction_pc |> filter(Junction %in% c("T or staggered junction", "Crossroads", "More than 4 arms (not roundabout)"))
# 
# } else {
# 
#   junc <- junction_pc |> filter(Junction %in% c("T or staggered junction", "Other junction", "Crossroads", "More than 4 arms (not roundabout)"))
# 
# roundabouts <- junction_pc |> filter(Junction %in% c("roundabout", "Mini-roundabout"))
# 
# }
# 
# 
# 

## ----echo = FALSE, warning=FALSE, message=FALSE-------------------------------
# 
# table_4 <- junction_pc |>
#   arrange(desc(All)) |>
#   mutate_if(is.numeric, round,1) |> # round all values to 1 dp
#   mutate_if(is.numeric, ~ paste0(.x, "%")) # add a % sign after each value
# 
# # create table 3
# gt(table_4,auto_align = FALSE) |>
#   cols_label(Junction = md("**Junction**"),
#              Fatalities = md("**Fatalities**"),
#              Serious = md("**Serious**"),
#              Slight = md("**Slight**"),
#              All = md("**All casualties**")) |>
#   tab_header(
#     title = md(paste0("**Table 4: Percentage of pedestrian KSI casualties by severity and junction detail where the collision occurred, GB: ", yr2calc-4, " to ",yr2calc,"**"))) |>
#   tab_options(heading.align = "left",
#               column_labels.border.top.style = "none",
#               table.border.top.style = "none",
#               column_labels.border.bottom.style = "none",
#               column_labels.border.bottom.width = 1,
#               column_labels.border.bottom.color = "#334422",
#               table_body.border.top.style = "none",
#               table_body.border.bottom.color = "white",
#               heading.border.bottom.style = "none",
#               table.border.bottom.style = "none") |>
#   tab_style(
#     style = cell_text(weight = "bold"),
#     locations = list(
#       cells_column_labels(columns = c(Junction)),
#       cells_body(columns = c(Junction))
#     ))
# 
#