Comparing weekdays to weekend.
In this document, we carry out an exploratory data analysis to investigate the parallel evolution of road traffic and nitrogen dioxide (NO\(_{2}\)) concentrations in Marseille. We focus on NO\(_{2}\) as it is locally emitted by cars. We carry out a simple analysis where compare the concentration of NO\(_{2}\) between weekdays and weekends as road traffic tends to decrease on Saturday and Sunday. The weekend/weedays contrast can be seen as a form of arbitrary variation in road traffic. We do not check the effects on other pollutants because they are less emitted locally.
In the following sections, we proceed as follows:
Should you have any questions, need help to reproduce the analysis or find coding errors, please do not hesitate to contact us at leo.zabrocki@gmail.com and marion.leroutier@hhs.se.
We load the following packages:
We load our custom ggplot2
theme for graphs:
First, we load the daily data:
We select relevant variables for our analysis:
data <- data %>%
select(
# date variable
"date",
# pollutants
"mean_no2_l",
"mean_no2_sl",
# maritime traffic variables
"total_gross_tonnage_cruise",
# weather parameters
"temperature_average",
"rainfall_height_dummy",
"humidity_average",
"wind_speed",
"wind_direction_categories",
# road traffic variables
"road_traffic_flow_all",
"road_occupancy_rate",
# calendar indicators
"weekday",
"weekend",
"holidays_dummy",
"bank_day_dummy",
"month",
"year"
)
We explore the patterns of road traffic. It is important to keep in mind that:
We first plot the average road traffic flow and occupancy rate by day of the week :
# make the graph
graph_road_traffic_wd <- data %>%
select(weekday, road_traffic_flow_all, road_occupancy_rate) %>%
pivot_longer(cols = -c(weekday), names_to = "traffic_variable", values_to = "value") %>%
mutate(traffic_variable = ifelse(traffic_variable == "road_traffic_flow_all", "Flow of Vehicles (N)", "Occupancy Rate (%)")) %>%
group_by(weekday, traffic_variable) %>%
summarise(mean_value = mean(value, na.rm = TRUE)) %>%
ggplot(., aes(x = weekday, y = mean_value)) +
geom_segment(aes(x = weekday, xend = weekday, y = 0, yend = mean_value)) +
geom_point(shape = 21, size = 4, colour = "black", fill = my_blue) +
facet_wrap(~ traffic_variable, scales = "free_y") +
xlab("Day of the Week") + ylab("Hourly Average") +
theme_tufte()
# we print the graph
graph_road_traffic_wd
# save the graph
ggsave(
graph_road_traffic_wd,
filename = here::here(
"inputs",
"3.outputs",
"2.daily_analysis",
"2.analysis_pollution",
"2.road_traffic",
"graph_road_traffic_wd.pdf"
),
width = 30,
height = 15,
units = "cm",
device = cairo_pdf
)
We then plot the empirical cumulative distribution of road traffic by weekdays and weekend:
graph_ecdf_road_traffic_we <- data %>%
select(weekend, road_traffic_flow_all, road_occupancy_rate) %>%
pivot_longer(cols = -c(weekend), names_to = "traffic_variable", values_to = "value") %>%
mutate(traffic_variable = ifelse(traffic_variable == "road_traffic_flow_all", "Flow of Vehicles (N)", "Occupancy Rate (%)")) %>%
mutate(weekend = ifelse(weekend == 1, "Weekend", "Weekday")) %>%
ggplot(., aes(x = value, colour = weekend)) +
stat_ecdf(size = 1.1) +
scale_color_manual(values = c(my_blue, my_orange)) +
facet_wrap(~ traffic_variable, scales = "free_x") +
ylab("Cumulative Probability") + xlab("Road Traffic Value") +
labs(colour = "Status:") +
theme_tufte()
# we print the graph
graph_ecdf_road_traffic_we
# save the graph
ggsave(
graph_ecdf_road_traffic_we,
filename = here::here(
"inputs",
"3.outputs",
"2.daily_analysis",
"2.analysis_pollution",
"2.road_traffic",
"graph_ecdf_road_traffic_we.pdf"
),
width = 30,
height = 18,
units = "cm",
device = cairo_pdf
)
We plot the average NO2 concentration by day of the week:
# make the graph
graph_no2_wd <- data %>%
select(weekday, mean_no2_l, mean_no2_sl) %>%
rename("Longchamp Station" = mean_no2_l,
"Saint-Louis Station" = mean_no2_sl) %>%
pivot_longer(cols = -c(weekday),
names_to = "station",
values_to = "concentration") %>%
group_by(weekday, station) %>%
summarise(mean_no2 = mean(concentration, na.rm = TRUE)) %>%
ggplot(., aes(x = weekday, y = mean_no2)) +
geom_segment(aes(
x = weekday,
xend = weekday,
y = 0,
yend = mean_no2
)) +
geom_point(
shape = 21,
size = 4,
colour = "black",
fill = my_blue
) +
facet_wrap( ~ station, ncol = 1) +
xlab("Day of the Week") + ylab("Average NO2 Concentrations (µg/m³)") +
theme_tufte()
# we print the graph
graph_no2_wd
# save the graph
ggsave(
graph_no2_wd,
filename = here::here(
"inputs",
"3.outputs",
"2.daily_analysis",
"2.analysis_pollution",
"2.road_traffic",
"graph_no2_wd.pdf"
),
width = 30,
height = 25,
units = "cm",
device = cairo_pdf
)
We also plot the empirical cumulative distribution of NO\(_{2}\) concentration by weekdays and weekend:
# make the graph
graph_ecdf_no2_we <- data %>%
select(weekend, mean_no2_l, mean_no2_sl) %>%
mutate(weekend = ifelse(weekend == 1, "Weekend", "Weekday")) %>%
rename("Longchamp Station" = mean_no2_l,
"Saint-Louis Station" = mean_no2_sl) %>%
pivot_longer(cols = -c(weekend),
names_to = "station",
values_to = "concentration") %>%
ggplot(., aes(x = concentration, colour = weekend)) +
stat_ecdf(size = 1.1) +
scale_color_manual(values = c(my_blue, my_orange)) +
facet_wrap( ~ station) +
ylab("Cumulative Probability") + xlab("NO2 Concentrations (µg/m³)") +
labs(colour = "Status:") +
theme_tufte() +
theme(
legend.position = "top",
legend.justification = "left",
legend.direction = "horizontal"
)
# we print the graph
graph_ecdf_no2_we
# save the graph
ggsave(
graph_ecdf_no2_we,
filename = here::here(
"inputs",
"3.outputs",
"2.daily_analysis",
"2.analysis_pollution",
"2.road_traffic",
"graph_ecdf_no2_we.pdf"
),
width = 30,
height = 15,
units = "cm",
device = cairo_pdf
)
We plot the average total gross tonnage of vessels by day of the week:
graph_gross_tonnage_cruise_wd <- data %>%
select(weekday, total_gross_tonnage_cruise) %>%
group_by(weekday) %>%
summarise(total_gross_tonnage_cruise = mean(total_gross_tonnage_cruise, na.rm = TRUE)) %>%
ggplot(., aes(x = weekday, y = total_gross_tonnage_cruise)) +
geom_segment(aes(
x = weekday,
xend = weekday,
y = 0,
yend = total_gross_tonnage_cruise
)) +
geom_point(
shape = 21,
size = 4,
colour = "black",
fill = my_blue
) +
scale_y_continuous(
breaks = scales::pretty_breaks(n = 5),
labels = function(x)
format(x, big.mark = " ", scientific = FALSE)
) +
xlab("Day of the Week") + ylab("Average Total Gross Tonnage") +
theme_tufte()
# we print the graph
graph_gross_tonnage_cruise_wd
# save the graph
ggsave(
graph_gross_tonnage_cruise_wd,
filename = here::here(
"inputs",
"3.outputs",
"2.daily_analysis",
"2.analysis_pollution",
"2.road_traffic",
"graph_gross_tonnage_cruise_wd.pdf"
),
width = 30,
height = 15,
units = "cm",
device = cairo_pdf
)
We can check that the weekends and weekdays are similar for weather covariates. For continuous weather covariates, we draw boxplots for treated and control groups:
# we select control variables and store them in a long dataframe
data_weather_continuous_variables <- data %>%
mutate(weekend = ifelse(weekend == 1, "Weekend", "Weekday")) %>%
select(temperature_average,
humidity_average,
wind_speed,
weekend) %>%
pivot_longer(cols = -c(weekend),
names_to = "variable",
values_to = "values") %>%
mutate(
variable = case_when(
variable == "temperature_average" ~ "Average Temperature (°C)",
variable == "humidity_average" ~ "Humidity Average (%)",
variable == "wind_speed" ~ "Wind Speed (m/s)"
)
)
graph_boxplot_continuous_weather <-
ggplot(data_weather_continuous_variables,
aes(x = weekend, y = values, colour = weekend)) +
geom_violin(size = 1.2) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
scale_color_manual(values = c(my_blue, my_orange)) +
ylab("Covariate Value") +
xlab("") +
labs(colour = "Units Status:") +
facet_wrap( ~ variable, scale = "free", ncol = 3) +
theme_tufte() +
theme(
legend.position = "top",
legend.justification = "left",
legend.direction = "horizontal"
)
# we print the graph
graph_boxplot_continuous_weather
# save the graph
ggsave(
graph_boxplot_continuous_weather,
filename = here::here(
"inputs",
"3.outputs",
"2.daily_analysis",
"2.analysis_pollution",
"2.road_traffic",
"graph_boxplot_continuous_weather.pdf"
),
width = 40,
height = 15,
units = "cm",
device = cairo_pdf
)
For the rainfall dummy and the wind direction categories, we plot the proportions:
# we select the rainfall variables
data_weather_categorical <- data %>%
mutate(weekend = ifelse(weekend == 1, "Weekend", "Weekday")) %>%
select(rainfall_height_dummy,
wind_direction_categories,
weekend) %>%
drop_na() %>%
mutate_at(vars(rainfall_height_dummy), ~ ifelse(. == 1, "True", "False")) %>%
mutate_all(~ as.character(.)) %>%
pivot_longer(cols = -c(weekend),
names_to = "variable",
values_to = "values") %>%
# group by weekend, variable and values
group_by(weekend, variable, values) %>%
# compute the number of observations
summarise(n = n()) %>%
# compute the proportion
mutate(freq = round(n / sum(n) * 100, 0)) %>%
ungroup() %>%
mutate(
variable = case_when(
variable == "wind_direction_categories" ~ "Wind Direction",
variable == "rainfall_height_dummy" ~ "Rainfall Dummy"
)
)
# build the graph
graph_categorical_weather <-
ggplot(data_weather_categorical, aes(x = freq, y = values, fill = weekend)) +
geom_point(shape = 21,
size = 6,
alpha = 0.8) +
scale_fill_manual(values = c(my_blue, my_orange)) +
facet_wrap(~ variable, scales = "free") +
xlab("Proportion (%)") +
ylab("") +
labs(fill = "Units Status:") +
theme_tufte() +
theme(
legend.position = "top",
legend.justification = "left",
legend.direction = "horizontal"
)
# we print the graph
graph_categorical_weather
# save the graph
ggsave(
graph_categorical_weather,
filename = here::here(
"inputs",
"3.outputs",
"2.daily_analysis",
"2.analysis_pollution",
"2.road_traffic",
"graph_categorical_weather.pdf"
),
width = 40,
height = 20,
units = "cm",
device = cairo_pdf
)
We calculate the average differences in road traffic and NO\(_{2}\) concentrations between weekends and weekdays.
We compute the average difference in the flow of vehicles and road occupancy rate between weekdays and weekends:
# compute difference in flow
diff_flow <- data %>%
filter(!is.na(road_traffic_flow_all)) %>%
summarise(average_difference = mean(road_traffic_flow_all[weekend == 1]) - mean(road_traffic_flow_all[weekend == 0]))
# compute difference in occupancy rate
diff_occupancy <- data %>%
filter(!is.na(road_occupancy_rate)) %>%
summarise(average_difference = mean(road_occupancy_rate[weekend == 1]) - mean(road_occupancy_rate[weekend == 0]))
On average, the hourly road traffic decreases by 484 vehicles on weekends. There is a drop of -3.9 poins in the occupancy rate.
We compute the average effect of weekends on NO2 concentrations.
# compute mean difference in no2 for the two stations
diff_no2 <- data %>%
select(weekend, mean_no2_l, mean_no2_sl) %>%
pivot_longer(cols = -c(weekend),
names_to = "station",
values_to = "concentration") %>%
group_by(station) %>%
summarise(average_difference = round(mean(concentration[weekend == 1], na.rm = TRUE) - mean(concentration[weekend == 0], na.rm = TRUE), 1))
On average, NO\(_{2}\) is lower between 7.5 and 8.2 \(\mu g/m^{3}\) on weekends compared to weekdays, depending on the monitoring station.
If you see mistakes or want to suggest changes, please create an issue on the source repository.