library(readr)
library(dplyr)
library(lubridate)
library(tidyverse)
library(stringr)
Extract Important Dates
This notebook is for extracting dates out of both the border wait time data and important dates for the Beitbridge border crossing. The important dates data set was generated through searching news articles, YouTube and other sources related to delays at the border crossing.
= read_csv("../data/raw/beitbridge_dates_of_interest.csv")
important_dates
= read_csv("../data/processed/Beitbridge_Counts_Wait_Time_2018_2022.csv") border_data
Format the important dates data to have start and end dates. Here we are splitting based on the arrow symbol. This is an artifact from Notion DB.
= important_dates %>%
important_dates mutate(start_date = mdy(if_else(str_detect(Date, '→'),
str_split(important_dates$Date, '→', n=2, simplify = TRUE)[,1],
Date)),end_date = mdy(if_else(str_detect(Date, '→'),
str_split(important_dates$Date, '→', n=2, simplify = TRUE)[,2],
Date)))
Format the border crossing dates as date time
$datetime <- ymd_h(paste(border_data$StartDate, border_data$StartHour)) border_data
Function to splice the data based on a date range for border data
# Function for selecting a range of dates for border data with StartDate field
= function(start_date, end_date, data){
get_specific_dates = interval(ymd(start_date), ymd(end_date))
int = data %>%
df filter(StartDate %within% int)
df }
Apply the function to our border data. Here we’re using the dates where we know there was an event and where we have high res imagery.
# Get December 2022
= get_specific_dates('2022-12-24', '2022-12-24', border_data) df
= '2022-12-24'
start_date = '2022-12-24'
end_date = interval(ymd(start_date), ymd(end_date))
int %>% filter(start_date %within% int) important_dates
Name | Tags | Date | Description | URL | start_date | end_date |
---|---|---|---|---|---|---|
Large queues at Beitbridge due to immigration of 20 000+ people | delay | December 24, 2022 | NA | https://www.sabcnews.com/sabcnews/more-than-20-000-immigrants-cross-from-south-africa-into-zimbabwe-at-the-beitbridge-border-post/ | 2022-12-24 | 2022-12-24 |
Plots
%>%
df ggplot(aes(x = Count_Events, y = Median_Minutes, group = Direction, col = Direction)) +
geom_point() +
scale_color_viridis_d(alpha=0.3,option = "plasma", end = .7) +
labs(y =" Median Minutes", x = "Total Count", title = "Border Wait Time and Counts", subtitle = "December 24th, 2022")
%>%
df ggplot(aes(x = StartHour, y = Median_Minutes, group= as_factor(Direction), col = as_factor(Direction))) +
geom_point(alpha = 0.7) +
geom_line()+
scale_color_viridis_d(option = "plasma", end = .7)+
labs(x= 'Hour of Day', y = 'Median Minutes', col="Direction", title = 'December 24, 2022', caption = "*Note missing data")
%>%
df ggplot(aes(x = StartHour, y = Count_Events, group= as_factor(Direction), col = as_factor(Direction))) +
geom_point(alpha = 0.7) +
geom_line()+
scale_color_viridis_d(option = "plasma", end = .7)+
labs(x= 'Hour', y = 'Count', col="Direction", title = 'December 24, 2022')
Cumulative Sum
%>%
df group_by(Direction)%>%
mutate(cumulative_count = cumsum(Count_Events))%>%
ggplot(aes(x = StartHour, y = cumulative_count , group= as_factor(Direction), col = as_factor(Direction))) +
geom_point(alpha = 0.7) +
geom_line()+
scale_color_viridis_d(option = "plasma", end = .7)+
labs(x= 'Hour', y = 'Cumulative Sum', col="Direction", title = 'December 24, 2022')
%>%
df group_by(Direction)%>%
mutate(cumulative_count = cumsum(Count_Events))%>%
ggplot(aes(x = StartHour, y = cumulative_count , group= as_factor(Direction), col = as_factor(Direction))) +
geom_point(alpha = 0.7) +
geom_line()+
scale_color_viridis_d(option = "plasma", end = .7)+
labs(x= 'Hour', y = 'Cumulative Sum', col="Direction", title = 'December 24, 2022')
Month Plots
The plots below are looking at the month of December 2022.
%>%
border_data filter(year(StartDate)=='2022', month(StartDate)==12)%>%
group_by(Direction, StartDate)%>%
arrange(Direction, StartDate)%>%
summarize(daily_count = sum(Count_Events, na.rm=T))%>%
mutate(cumulative_sum = cumsum(daily_count))%>%
ggplot(aes(x = StartDate, y = cumulative_sum , group= as_factor(Direction), col = as_factor(Direction))) +
geom_point(alpha = 0.7) +
geom_line()+
scale_color_viridis_d(option = "plasma", end = .7)+
labs(x= 'Date', y = 'Cumulative Sum', col="Direction", title = 'December, 2022')
%>%
border_data filter(year(StartDate)=='2022', month(StartDate)==12)%>%
group_by(StartDate, Direction)%>%
summarize(max_median = max(Median_Minutes,na.rm=T))%>%
ggplot(aes(x=StartDate,y = max_median, group= as_factor(Direction), col = as_factor(Direction))) +
geom_point(alpha = 0.7) +
geom_line()+
scale_color_viridis_d(option = "plasma", end = .7)+
scale_x_date(date_labels = "%d",date_breaks = '2 days', limits =c(as_date('2022-12-01'),as_date('2022-12-31') )) +
theme(axis.text.x=element_text(angle=60, hjust=1)) +
labs(x= 'Day', y = 'Max Daily Median Wait Time', col="Direction", title = 'December 2022')
Hourly Scatter Plots
%>%
border_data filter(Direction == 'SA-Zimbabwe')%>%
filter(year(StartDate)=='2022', month(StartDate)==12)%>%
ggplot(aes(x=datetime, y=Median_Minutes, group=as_factor(Direction), col=as_factor(Direction)))+
geom_point(alpha = 0.7) +
#geom_line()+
labs(x= 'Day', y = 'Median Hourly Wait Time', col="Direction", title = 'December 2022')+
scale_color_viridis_d(option = "plasma", end = .7)
%>%
border_data filter(Direction == 'Zimbabwe-SA')%>%
filter(year(StartDate)=='2022', month(StartDate)==12)%>%
ggplot(aes(x=datetime, y=Median_Minutes, group=as_factor(Direction), col=as_factor(Direction)))+
geom_point(alpha = 0.7) +
#geom_line()+
labs(x= 'Day', y = 'Median Hourly Wait Time', col="Direction", title = 'December 2022')+
scale_color_viridis_d(option = "magma", end = .7)
%>%
border_data filter(year(StartDate)=='2022', month(StartDate)==12)%>%
group_by(StartDate, Direction)%>%
summarize(Total_Count= sum(Count_Events,na.rm=T))%>%
ggplot(aes(x=StartDate,y = Total_Count, group= as_factor(Direction), col = as_factor(Direction))) +
geom_point(alpha = 0.7) +
geom_line()+
scale_color_viridis_d(option = "plasma", end = .7)+
labs(x= 'Day', y = 'Total Count', col="Direction", title = 'December 2022')