Beitbridge GPS Exploration

Author

Sparkgeo

library(readr)
library(dplyr)
library(ggplot2)
library(skimr)
library(lubridate)
library(tidymodels)
library(tidyverse)

This notebook is exploring the Beitbridge GPS data with plots and limited statistics. Some of these plots are throw-away.

beitbridge_border = read_csv("../data/processed/Beitbridge_Counts_Wait_Time_2018_2022.csv")
beitbridge_border %>%
  ggplot(aes(x = Count_Events, y = Median_Minutes, group = Direction, col = Direction)) + 
  geom_point() +
  scale_color_viridis_d(alpha=0.3,option = "plasma", end = .7) +
  labs(x = "Count", y = "Median Minutes")

beitbridge_border %>%
  ggplot(aes(x = Count_Events, y = Median_Minutes, group= as_factor(StartHour), col = as_factor(StartHour))) + 
  geom_point(alpha = 0.7) +
  scale_color_viridis_d("Hour", option = "plasma", end = .7, alpha =0.2)+
  labs(x = "Count", y = "Median Minutes")

Subsets

SA - Zimbabwe

sa_zimbabwe = beitbridge_border %>% filter(Direction == "SA-Zimbabwe")

Monthly Count

 monthly_count_sa = sa_zimbabwe %>% group_by(year=year(StartDate), month=month(StartDate, label=TRUE)) %>% summarize(Month_Total = sum(Count_Events))
monthly_count_sa %>%
  ggplot(aes(x=month, y=Month_Total, group=as_factor(year), colour=as_factor(year))) +
  geom_line()+
  labs(y = 'Count of Vehicles Processed', x= "Month", caption = "For Vehicles with GPS*", colour ="Year")+
  scale_color_viridis_d(option = "magma", end = .7)

We’ll start with the South Africa –> Zimbabwe direction. We can tell from the plot above there seems to be higher counts within hour 0.

We can filter and slice our graph by the hour. Let’s look at 0.

sa_zimbabwe %>%
  filter(StartHour == 0 )%>%
  ggplot(aes(x = Count_Events, y = Median_Minutes, group= as_factor(StartHour), col = as_factor(StartHour))) + 
  geom_point(alpha = 0.7) +
  scale_color_viridis_d("Hour",option = "plasma", end = .7)+
  labs(x = "Count", y = "Median Minutes")

sa_zimbabwe %>%
  filter(StartHour == 0 )%>%
  ggplot(aes(x = Count_Events, y = Median_Minutes, group = as_factor(year(StartDate)), col = as_factor(year(StartDate)))) +
  geom_point(alpha = 0.7) +
  scale_color_viridis_d("Year",option = "plasma", end = .7)+
  labs(x = "Count", y = "Median Minutes")

Violin plot zoomed in by start hour 0 and Year.

sa_zimbabwe %>%
  filter(StartHour == 0 )%>%
  ggplot(aes(x = as_factor(year(StartDate)), y = Median_Minutes)) +
  geom_violin(alpha = 0.7) +
  scale_color_viridis_d("Year",option = "plasma", end = .7)+
  labs(x = "Year", y = "Median Minutes")

sa_zimbabwe %>%
  filter(StartHour == 0 )%>%
  ggplot(aes(x = Count_Events, y = Median_Minutes, group = as_factor(wday(StartDate, TRUE)), col = as_factor(wday(StartDate, TRUE)))) +
  geom_point(alpha = 0.7) +
  scale_color_viridis_d("Day of Week",option = "plasma", end = .7)+
  labs(x = "Count", y = "Median Minutes")

sa_zimbabwe %>%
  filter(StartHour == 0 )%>%
  ggplot(aes(x = Median_Minutes, y = as_factor(wday(StartDate, TRUE)), group = as_factor(wday(StartDate, TRUE)), col = as_factor(wday(StartDate, TRUE)))) +
  geom_boxplot(alpha = 0.7) +
  scale_color_viridis_d("Day of Week",option = "plasma", end = .7)+
  labs(x = "Count", y = "Median Minutes")

Day of Week

Let’s look at day of week across the study period.

sa_zimbabwe %>%
  ggplot(aes(x = Count_Events, y = Median_Minutes, group= as_factor(wday(StartDate, TRUE)), col = as_factor(wday(StartDate, TRUE)))) + 
  geom_point(alpha = 0.7) +
  scale_color_viridis_d("Day of Week", option = "plasma", end = .7, alpha =0.5)+
  labs(x = "Count", y = "Median Minutes")

Zimbabwe - SA

zimbabwe_sa = beitbridge_border %>% filter(Direction == "Zimbabwe-SA")
zimbabwe_sa %>%
  filter(year(StartDate)=='2019')%>%
  ggplot(aes(x = Count_Events, y = Median_Minutes, group = as_factor(year(StartDate)), col = as_factor(year(StartDate)))) + 
  geom_point(alpha = 0.7) +
   # geom_smooth(method = lm, se = FALSE) +
  scale_color_viridis_d("Year", option = "plasma", end = .7)+
  labs(x = "Count", y = "Median Minutes")

Convert to long format for plotting

zim_long = zimbabwe_sa %>%
  select(-Direction)%>%
  pivot_longer(cols = c(Median_Minutes, Bottom_10__Minutes, Top_10__Minutes), values_to = 'minutes')
zim_long %>%
  filter(StartDate == '2018-01-01' & Count_Events > 0)%>%
  mutate(name = str_replace(name,"_"," "))%>%
   mutate(name = str_replace(name,"__"," "))%>%
  ggplot(aes(x = StartHour, y = minutes, group = name, col = name))+
  geom_point()+
  geom_line() +
  labs(x = "Hour of Day", y = "Minutes", col = "Legend")

 zimbabwe_sa %>%
  filter(StartDate == '2022-12-30')%>%
  ggplot(aes(x = StartHour, y = Median_Minutes), colour = 'blue')+
  geom_point()+
  geom_line(colour = 'blue')+
  geom_ribbon(aes(ymin = Bottom_10__Minutes, ymax = Top_10__Minutes), fill = 'light blue', alpha = 0.4)+
  labs(x = "Hour", y="Median Minutes")

Daily

 zimbabwe_sa %>% group_by(StartDate) %>% summarize(Day_Total = sum(Count_Events))%>%ggplot(aes(x=StartDate, y=Day_Total))+geom_bar(stat = "identity")

Monthly

 monthly_count = zimbabwe_sa %>% group_by(year=year(StartDate), month=month(StartDate, label=TRUE)) %>% summarize(Month_Total = sum(Count_Events))
monthly_count %>%
  ggplot(aes(x=month, y=Month_Total, group=as_factor(year), colour=as_factor(year))) +
  geom_line()+
  labs(y = 'Count of Vehicles Processed', x= "Month", caption = "For Vehicles with GPS*", colour ="Year")+
  scale_color_viridis_d(option = "plasma", end = .7)

A clear difference during Covid and lock-downs when aggregated at a monthly scale.