Comparing hours with entering cruise traffic to hours without. Adjusting for calendar and weather indicators.
In this document, we take great care providing all steps and R codes required to check the intervention we set up in our matching procedure. We compare hours where:
We adjust for calendar calendar indicator and weather confounding factors.
Should you have any questions, need help to reproduce the analysis or find coding errors, please do not hesitate to contact us at leo.zabrocki@gmail.com and marion.leroutier@hhs.se.
We load the following packages:
# load required packages
library(knitr) # for creating the R Markdown document
library(here) # for files paths organization
library(tidyverse) # for data manipulation and visualization
library(ggridges) # for ridge density plots
library(Cairo) # for printing custom police of graphs
library(patchwork) # combining plots
We finally load our custom ggplot2 theme for graphs:
We load the matched data:
We compute the difference in the cruise entering total gross tonnage for each pair:
# compute the difference in tonnage by pair
pair_difference_tonnage_t <- data_matched %>%
select(total_gross_tonnage_entry_cruise, is_treated, pair_number) %>%
arrange(pair_number, is_treated) %>%
select(-is_treated) %>%
group_by(pair_number) %>%
summarise(tonnage_difference = total_gross_tonnage_entry_cruise[2] - total_gross_tonnage_entry_cruise[1])
We find on average, a 6.6007^{4} difference in gross tonnage between treated and control units. Below is the distribution of the pair difference in hourly gross tonnage in t:
# plot the graph
graph_tonnage_difference_density <-
ggplot(pair_difference_tonnage_t, aes(x = tonnage_difference)) +
geom_density(
colour = my_blue,
size = 1.1,
alpha = 0.8
) +
geom_vline(
xintercept = mean(pair_difference_tonnage_t$tonnage_difference),
size = 1.1,
color = "coral") +
scale_x_continuous(
breaks = scales::pretty_breaks(n = 8),
labels = function(x)
format(x, big.mark = " ", scientific = FALSE)
) +
xlab("Pair Difference in Tonnage") + ylab("Density") +
theme_tufte()
# we print the graph
graph_tonnage_difference_density
# save the graph
ggsave(
graph_tonnage_difference_density,
filename = here::here(
"inputs",
"3.outputs",
"1.hourly_analysis",
"2.experiment_cruise",
"1.checking_matching_procedure",
"graph_tonnage_difference_entering_cruise_tonnage_density.pdf"
),
width = 30,
height = 20,
units = "cm",
device = cairo_pdf
)
To check which hypothetical intervention we study, we plot below the average tonnage for each hour and for treated and control groups :
# compute mean tonnage for each day
mean_tonnage_group_hour <- data_matched %>%
# select relevant variables
select(pair_number,
is_treated,
contains("total_gross_tonnage_entry_cruise")) %>%
# transform data in long format
pivot_longer(
cols = -c(pair_number, is_treated),
names_to = "variable",
values_to = "tonnage"
) %>%
# create the day variable
mutate(
time = 0 %>%
ifelse(str_detect(variable, "lag_1"), -1, .) %>%
ifelse(str_detect(variable, "lag_2"), -2, .) %>%
ifelse(str_detect(variable, "lag_3"), -3, .) %>%
ifelse(str_detect(variable, "lead_1"), 1, .) %>%
ifelse(str_detect(variable, "lead_2"), 2, .) %>%
ifelse(str_detect(variable, "lead_3"), 3, .)
) %>%
# rename the labels of the is_treated dummy
mutate(is_treated = ifelse(is_treated == TRUE, "Treated", "Control")) %>%
# compute the mean tonnage for each day and pollutant
group_by(variable, is_treated, time) %>%
summarise(tonnage = mean(tonnage))
# plot the graph
graph_mean_tonnage_group_hour <-
ggplot(
mean_tonnage_group_hour,
aes(
x = as.factor(time),
y = tonnage,
group = is_treated,
colour = is_treated,
fill = is_treated
)
) +
geom_segment(
x = 4,
y = 0,
xend = 4,
yend = 65667,
lineend = "round",
# See available arrow types in example above
linejoin = "round",
size = 0.5,
colour = "black"
) +
geom_line() +
geom_point(shape = 21,
size = 5,
colour = "black") +
scale_colour_manual(values = c(my_blue, my_orange)) +
scale_fill_manual(values = c(my_blue, my_orange)) +
scale_y_continuous(
breaks = scales::pretty_breaks(n = 10),
labels = function(x)
format(x, big.mark = " ", scientific = FALSE)
) +
labs(fill = "Group:") +
xlab("Hour") + ylab("Hourly Gross Tonnage") +
theme_tufte() +
theme(
axis.title.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 20),
legend.text = element_text(size = 18)
) +
guides(color = FALSE)
# we print the graph
graph_mean_tonnage_group_hour
# save the graph
ggsave(
graph_mean_tonnage_group_hour,
filename = here::here(
"inputs",
"3.outputs",
"1.hourly_analysis",
"2.experiment_cruise",
"1.checking_matching_procedure",
"graph_mean_tonnage_group_hour_entering_cruise.pdf"
),
width = 40,
height = 20,
units = "cm",
device = cairo_pdf
)
We also check how the difference in tonnage for other vessels’ types between treated and control units evolves:
# we create a table with the tonnage for each pair,
# for each vessel type,
# and for -6 hours to + 6 hours
data_vessel_type_tonnage <- data_matched %>%
# relabel treatment indicator
mutate(is_treated = ifelse(is_treated == TRUE, "treated", "control")) %>%
# select relevant variables
select(
pair_number,
is_treated,
contains("total_gross_tonnage_entry_cruise"),
contains("total_gross_tonnage_exit_cruise"),
contains("total_gross_tonnage_entry_ferry"),
contains("total_gross_tonnage_exit_ferry"),
contains("total_gross_tonnage_entry_other_vessels"),
contains("total_gross_tonnage_exit_other_vessels")
) %>%
# transform data in long format
pivot_longer(
cols = -c(pair_number, is_treated),
names_to = "variable",
values_to = "tonnage"
) %>%
# create vessel type variable
mutate(
vessel_type = NA %>%
ifelse(str_detect(variable, "cruise"), "Cruise", .) %>%
ifelse(str_detect(variable, "ferry"), "Ferry", .) %>%
ifelse(str_detect(variable, "other_vessels"), "Other Vessels", .)
) %>%
# create the day variable
mutate(entry_exit = NA %>%
ifelse(str_detect(variable, "entry"), "Arrival", .) %>%
ifelse(str_detect(variable, "exit"), "Departure", .)) %>%
mutate(
time = 0 %>%
ifelse(str_detect(variable, "lag_1"),-1, .) %>%
ifelse(str_detect(variable, "lag_2"),-2, .) %>%
ifelse(str_detect(variable, "lag_3"),-3, .) %>%
ifelse(str_detect(variable, "lead_1"), 1, .) %>%
ifelse(str_detect(variable, "lead_2"), 2, .) %>%
ifelse(str_detect(variable, "lead_3"), 3, .)
) %>%
select(pair_number, vessel_type, is_treated, entry_exit, time, tonnage) %>%
pivot_wider(names_from = is_treated, values_from = tonnage)
# compute the average difference in traffic between treated and control units
data_mean_difference <- data_vessel_type_tonnage %>%
mutate(difference = treated - control) %>%
select(-c(treated, control)) %>%
group_by(vessel_type, entry_exit, time) %>%
summarise(mean_difference = mean(difference)) %>%
ungroup()
# plot the evolution
graph_tonnage_difference_vessel_type <-
ggplot(data_mean_difference,
aes(x = as.factor(time), y = mean_difference, group = "l")) +
geom_hline(yintercept = 0) +
geom_segment(aes(
x = as.factor(time),
xend = as.factor(time),
y = 0,
yend = mean_difference
)) +
geom_point(
shape = 21,
size = 5,
colour = "black",
fill = my_blue
) +
scale_y_continuous(
breaks = scales::pretty_breaks(n = 5),
labels = function(x)
format(x, big.mark = " ", scientific = FALSE)
) +
facet_grid(vessel_type ~ entry_exit) +
xlab("Hour") + ylab("Average Difference in\n Gross Tonnage") +
theme_tufte() +
theme(
axis.title.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 20),
legend.text = element_text(size = 18)
)
# we print the graph
graph_tonnage_difference_vessel_type
# save the graph
ggsave(
graph_tonnage_difference_vessel_type,
filename = here::here(
"inputs",
"3.outputs",
"1.hourly_analysis",
"2.experiment_cruise",
"1.checking_matching_procedure",
"graph_tonnage_difference_vessel_type.pdf"
),
width = 50,
height = 36,
units = "cm",
device = cairo_pdf
)
We combine the two previous plots:
# combine plots
graph_hourly_intervention <-
graph_mean_tonnage_group_hour / graph_tonnage_difference_vessel_type +
plot_layout(heights = c(1, 1.5)) +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(size = 30, face = "bold"))
# save the plot
ggsave(
graph_hourly_intervention,
filename = here::here(
"inputs",
"3.outputs",
"1.hourly_analysis",
"2.experiment_cruise",
"1.checking_matching_procedure",
"graph_hourly_intervention.pdf"
),
width = 30,
height = 30,
units = "cm",
device = cairo_pdf
)
If you see mistakes or want to suggest changes, please create an issue on the source repository.