#### Paper title: Polls of fear? Electoral violence, incumbent strength, and voter turnout in Côte d'Ivoire
#### Purpose: Descriptive analysis
#### Author: Sebastian van Baalen
#### Date: 2023-01-04

#### Read this first ####

# If the script contains errors or fails to run, compare the package version
# with the version listed in the run-file.

#### Front matter ####

# Install packages

# Un-comment to install packages

#install.packages("here") # For using relative paths
#install.packages("ggplot2") # For making graphs and figures
#install.packages("tidyverse") # For working with tidy data
#install.packages("sf") # For working with spatial data
#install.packages("viridis") # For using the Viridis color scales

# Load packages

library(here) # For using relative paths
## here() starts at /Users/sebva935/Documents/Datasets and scripts/Replication files/JPR 2022
library(ggplot2) # For making graphs and figures
library(tidyverse) # For working with tidy data
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ✔ purrr   0.3.5      
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(sf) # For working with spatial data
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(viridis) # For using the Viridis color scales
## Loading required package: viridisLite
# Load electoral violence dataset

load("ev.Rdata")

# Set theme

my_theme <- 
  theme_bw() +
  theme(
    axis.title.y = element_text(margin = margin (t = 0, r = 10, b = 0, l = 0), size = 16),
    axis.title.x = element_text(margin = margin (t = 10, r = 0, b = 0, l = 0), size = 16),
    axis.text.y = element_text(size = 14),
    axis.text.x = element_text(size = 12)
  )

#### Data limitations and reporting ####

# Create source variables

ev <- ev %>%
  mutate(
    MultipleSources = case_when(
      grepl(";", Source) ~ "Multiple.Sources",
      TRUE ~ "Single.Source"
    ),
    ACLED = case_when(
      grepl("ACLED", Source) ~ 1,
      TRUE ~ 0
    ),
    CNDH = case_when(
      grepl("CNDH", Source) ~ 1,
      TRUE ~ 0
    ),
    ACLEDCNDH = case_when(
      grepl("ACLED", Source) ~ 1,
      grepl("CNDH", Source) ~ 1,
      TRUE ~ 0
    ),
  ) %>%
  separate(col = EventName, into = c("EventName1", "EventName2"), sep = ";", remove = FALSE)
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 301 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
##### Number and share of events reported by multiple sources ####

ev %>% 
  group_by(MultipleSources) %>% 
  summarise(Frequency = n()) %>% 
  ungroup %>% 
  mutate('Share of events (%)' = Frequency/sum(Frequency) * 100)
## # A tibble: 2 × 3
##   MultipleSources  Frequency `Share of events (%)`
##   <chr>                <int>                 <dbl>
## 1 Multiple.Sources        49                  15.3
## 2 Single.Source          271                  84.7
##### Number and share of events reported by multiple sources by event type ####

ev %>% 
  group_by(EventName1, MultipleSources) %>% 
  summarise(Frequency = n()) %>% 
  pivot_wider(names_from = MultipleSources, values_from = Frequency) %>% 
  ungroup %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate('Share of events (%)' = Multiple.Sources/(Multiple.Sources + Single.Source) * 100)
## `summarise()` has grouped output by 'EventName1'. You can override using the
## `.groups` argument.
## # A tibble: 16 × 4
##    EventName1                        Multiple.Sources Single.Source Share of e…¹
##    <chr>                                        <int>         <int>        <dbl>
##  1 Arrest                                           1             4        20   
##  2 Arson                                            0            20         0   
##  3 Attack                                           8            20        28.6 
##  4 Clash                                           24            38        38.7 
##  5 Intimidation                                     0            11         0   
##  6 Kidnapping                                       0             1         0   
##  7 Killing                                          0             1         0   
##  8 Looting                                          1            56         1.75
##  9 Protest                                          1             0       100   
## 10 Protest with intervention                        2             6        25   
## 11 Riot                                             0             1         0   
## 12 Roadblocks                                       2            26         7.14
## 13 Shooting                                         0             1         0   
## 14 Unrest                                           0            10         0   
## 15 Violent protest                                  2            50         3.85
## 16 Violent protest with intervention                8            26        23.5 
## # … with abbreviated variable name ¹​`Share of events (%)`
##### Number and share of events reported by multiple sources by bodily injury ####

ev %>% 
  mutate(
    Casualties = case_when(
      ParticipantDeathsHigh > 0 ~ "Casualties",
      ParticipantInjuriesHigh > 0 ~ "Casualties",
      TRUE ~ "No.Casualties"
    )
  ) %>% 
  group_by(Casualties, MultipleSources) %>% 
  summarise(Frequency = n()) %>% 
  pivot_wider(names_from = MultipleSources, values_from = Frequency) %>% 
  ungroup %>% 
  mutate_if(is.numeric, ~replace_na(., 0)) %>% 
  mutate('Share of events (%)' = Multiple.Sources/(Multiple.Sources + Single.Source) * 100)
## `summarise()` has grouped output by 'Casualties'. You can override using the
## `.groups` argument.
## # A tibble: 2 × 4
##   Casualties    Multiple.Sources Single.Source `Share of events (%)`
##   <chr>                    <int>         <int>                 <dbl>
## 1 Casualties                  30            51                 37.0 
## 2 No.Casualties               19           220                  7.95
##### Number of events by timing ####

ev %>%
  mutate(
    Date = as.Date(Date, format =  "%Y-%m-%d"),
    Timing = case_when(
      Date < "2020-10-31" ~ "Pre-election",
      Date == "2020-10-31" ~ "Presidential election day",
      Date > "2020-10-31" & Date < "2021-03-06" ~ "Interim period",
      Date == "2021-03-06" ~ "Legislative election day"
    )
  ) %>%
  group_by(Timing) %>%
  summarise(Total = n())
## # A tibble: 4 × 2
##   Timing                    Total
##   <chr>                     <int>
## 1 Interim period               53
## 2 Legislative election day     14
## 3 Pre-election                155
## 4 Presidential election day    98
#### Visualizations ####

##### Bar graph of number of events by type ####

# Note that this graph focuses on how (through what type of action) the event started

ev %>%
  ggplot() +
  geom_bar(aes(x = EventName1), fill = "grey68") +
  scale_y_continuous(breaks = seq(0, 60, 10)) +
  labs(x = NULL, y = "Number of events") +
  my_theme +
  theme(axis.text.x = element_text(hjust = 1, size = 12, angle = 45))

ggsave("event_type.png", width = 14, height = 8.9)

# Number of deaths and injuries by event type

ev %>%
  group_by(EventName1) %>%
  summarise(
    Deaths = sum(ParticipantDeaths),
    Injured = sum(ParticipantInjuries)
  ) %>%
  select(EventName1, Deaths, Injured)
## # A tibble: 16 × 3
##    EventName1                        Deaths Injured
##    <chr>                              <int>   <int>
##  1 Arrest                                 0       0
##  2 Arson                                  0       0
##  3 Attack                                12      34
##  4 Clash                                 71     583
##  5 Intimidation                           0       3
##  6 Kidnapping                             0       0
##  7 Killing                                1       0
##  8 Looting                                0       1
##  9 Protest                                0       3
## 10 Protest with intervention              3       5
## 11 Riot                                   0       0
## 12 Roadblocks                             0       3
## 13 Shooting                               0       0
## 14 Unrest                                 0       0
## 15 Violent protest                        0       3
## 16 Violent protest with intervention      4      47
# Total number of deaths and injuries

ev %>% summarise(Deaths = sum(ParticipantDeaths), Injured = sum(ParticipantInjuries))
##   Deaths Injured
## 1     91     682
##### Bar graph of the number of casualties by type of violence ####

ev %>%
  separate(col = EventName, into = c("EventName1", "EventName2"), sep = ";") %>%
  group_by(EventName1) %>%
  summarise(
    Casualties = sum(ParticipantDeaths, ParticipantInjuries),
    CasualtiesHigh = sum(ParticipantDeathsHigh, ParticipantInjuriesHigh),
    CasualtiesLow = sum(ParticipantDeathsLow, ParticipantInjuriesLow)
  ) %>%
  ggplot() +
  geom_bar(aes(x = EventName1, y = Casualties), stat = "identity", fill = "grey68") +
  geom_errorbar(aes(x = EventName1, ymin = CasualtiesLow, ymax = CasualtiesHigh), stat = "identity", linewidth = .8, width = 0.2) +
  scale_y_continuous(breaks = seq(0, 700, 100)) +
  labs(x = NULL, y = "Number of casualties") +
  my_theme +
  theme(axis.text.x = element_text(hjust = 1, size = 12, angle = 45))
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 301 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].

ggsave("event_type_casualties.png", width = 14, height = 8.9)
  
##### Bar graph of share of events by involvement ####

positions <- c("Not reported", "Only opposition", "Only government", "Both opposition and government")

ev %>%
  mutate(
    Involvement = case_when(
      Actor1Side == 1 & Actor2Side == -99 ~ "Only opposition",
      Actor1Side == 1 & is.na(Actor2Side) ~ "Only opposition",
      Actor1Side == 1 & Actor2Side == 1 ~ "Only opposition",
      Actor1Side == -99 & Actor2Side == 0 ~ "Only government",
      Actor1Side == 0 & Actor2Side == 0 ~ "Only government",
      Actor1Side == 0 & is.na(Actor2Side) ~ "Only government",
      Actor1Side == 1 & Actor2Side == 0 ~ "Both opposition and government",
      Actor1Side == 0 & Actor2Side == 1 ~ "Both opposition and government",
      TRUE ~ "Not reported"
    )
  ) %>%
  select(Actor1Side, Actor2Side, Involvement) %>%
  ggplot() +
  geom_bar(aes(y = (after_stat(count))/sum(after_stat(count)), x = Involvement), fill = "grey68") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), breaks = seq(0, 0.5, 0.1), limits = c(0, 0.5)) +
  scale_x_discrete(limits = positions) +
  labs(x = NULL, y = "Share of events") +
  my_theme

ggsave("involvement.png", width = 12, height = 8)
    
##### Bar graph of the share of events by initiator ####

ev <- ev %>%
  mutate(
    Initiator = case_when(
      Actor1Side == 1 & EventDirection == 0 ~ "Opposition-initiated",
      Actor1Side == 1 & ViolenceInitiator == "0,1" ~ "Opposition-initiated",
      Actor1Side == 0 & EventDirection == 0 ~ "Government-initiated",
      Actor1Side == 0 & ViolenceInitiator == "0,1" ~ "Government-initiated",
      Actor2Side == 0 & ViolenceInitiator == "0,2" ~ "Government-initiated",
      TRUE ~ "Initiator not reported"
    )
  )

table(ev$Actor1Name[ev$Initiator == "Initiator not reported"])
## 
##                                    A group of men 
##                                                 1 
## Ethnic community supporting the opposition (PDCI) 
##                                                 1 
##                             Government supporters 
##                                                 1 
##                                Machette-armed man 
##                                                 1 
##                                          Microbes 
##                                                 1 
##                             Opposition supporters 
##                                                 2 
##                   Opposition supporters (assumed) 
##                                                 8 
##        Opposition supporters (FPI-Laurent Gbagbo) 
##                                                 1 
##                       Opposition supporters (FPI) 
##                                                 1 
##            Opposition supporters (GPS, FPI, PDCI) 
##                                                 1 
##                                         Residents 
##                                                 1 
##                    Unidentified armed individuals 
##                                                11 
##                         Unidentified armed youths 
##                                                 1 
##                          Unidentified individuals 
##                                                94 
##                                  Unidentified men 
##                                                14 
##                                Unidentified youth 
##                                                 9
# Order categories in chart

positions <- c("Opposition-initiated", "Government-initiated", "Initiator not reported")

ev %>%
  ggplot() +
  geom_bar(aes(y = (..count..)/sum(..count..), x = Initiator), fill = "grey68") +
  scale_x_discrete(limits = positions) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), breaks = seq(0, 0.5, 0.1), limits = c(0, 0.5)) +
  labs(x = NULL, y = "Share of events") +
  my_theme
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.

ggsave("initiator.png", width = 12, height = 8)


##### Bar graph of the number of events by type and timing ####

ev %>%
  mutate(
    Date = as.Date(Date, format =  "%Y-%m-%d"),
    Timing = case_when(
      Date < "2020-10-31" ~ "Pre-election",
      Date == "2020-10-31" ~ "Presidential election day",
      Date > "2020-10-31" & Date < "2021-03-06" ~ "Interim period",
      Date == "2021-03-06" ~ "Legislative election day"
    )
  ) %>%
  ggplot() +
  geom_bar(aes(x = EventName1), fill = "grey68") +
  facet_wrap(~ factor(Timing, levels = c("Pre-election", "Presidential election day", "Interim period", "Legislative election day"))) +
  labs(x = NULL, y = "Number of events") +
  my_theme +
  theme(
    axis.text.x = element_text(hjust = 1, size = 12, angle = 45),
    strip.text.x = element_text(size = 14)
  )

ggsave("type_and_timing.png", width = 16, height = 12)

##### Number of electoral violence events by voting district ####

load("df.Rdata")
  
# Load shapefile

shapefile <- st_read("shapefile.shp")
## Reading layer `shapefile' from data source 
##   `/Users/sebva935/Documents/Datasets and scripts/Replication files/JPR 2022/shapefile.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 205 features and 3 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -8.584147 ymin: 4.360553 xmax: -2.492309 ymax: 10.74565
## Geodetic CRS:  WGS 84
# Merge electoral results and shapefile

shapefile <- shapefile %>% rename(Vote.District = Legislativ)

df <- shapefile %>%
  left_join(df, by = "Vote.District")

# Make electoral violence dataset spatial to enable geo-processing

ev <- as.data.frame(ev) %>%
  st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE)

# Create electoral violence variable

df <- df %>% 
  mutate(
    Electoral.Violence.Events = lengths(st_intersects(x = df, y = ev))
  )

# Make map

df %>%
  ggplot() +
  geom_sf(
    aes(
      fill = Electoral.Violence.Events
    ),
    color = "white",
    linewidth = 0.1,
  ) +
  coord_sf(
    xlim = c(-9, -2),
    ylim = c(4, 11),
    expand = FALSE
  ) +
  scale_fill_viridis(
    name = "Number of events",
    option = "viridis",
    discrete = FALSE,
    begin = 0,
    end = 1,
    alpha = 0.8,
    direction = 1,
    na.value = "white"
  ) +
  annotate(
    "text",
    x = -3.55,
    y = 9.1,
    label = "Bouna\nNational\nPark",
    size = 3.5,
    lineheight = 1,
    hjust = "center"
  ) +
  theme_void()

ggsave("map.png", width = 10, height = 8.9)

##### Number of electoral violence events by voting district in Abidjan ####

df %>%
  mutate(Sub.Prefecture = toupper(Sub.Prefecture)) %>%
  filter(Region == "District Autonome D'Abidjan") %>%
  mutate(
    Sub.Prefecture1 = case_when(
      Sub.Prefecture != "MARCORY" ~ Sub.Prefecture
    ),
    Sub.Prefecture2 = case_when(
      Sub.Prefecture == "MARCORY" ~ "MARCORY"
    )
  ) %>%
  ggplot() +
  geom_sf(
    aes(
      fill = Electoral.Violence.Events
    ),
    color = "white",
    size = 0.1,
  ) +
  coord_sf(
    xlim = c(-4.35, -3.75),
    ylim = c(5.2, 5.55),
    expand = FALSE
  ) +
  scale_fill_viridis(
    name = "Number of events",
    option = "viridis",
    discrete = FALSE,
    begin = 0,
    end = 1,
    alpha = 0.8,
    direction = 1,
    na.value = "white"
  ) +
  geom_sf_label(aes(label = Sub.Prefecture1), size = 3) +
  geom_sf_label(aes(label = Sub.Prefecture2), size = 3, nudge_y = 0.013) +
  theme_void()
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning: Removed 1 rows containing missing values (`geom_label()`).
## Warning: Removed 12 rows containing missing values (`geom_label()`).

ggsave("map_abidjan.png", width = 10, height = 6)
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
## Warning: Removed 1 rows containing missing values (`geom_label()`).
## Warning: Removed 12 rows containing missing values (`geom_label()`).
##### Bar graph of the number of events by strongholds (2010) ####

df %>%
  ggplot() +
  geom_bar(aes(x = Winner.Name.2010, y = Electoral.Violence.Events/sum(Electoral.Violence.Events)), stat = "identity", fill = "grey68") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), breaks = seq(0, 1, 0.1), limits = c(0, 1)) +
  labs(x = NULL, y = "Share of events") +
  my_theme

ggsave("strongholds2010.png", width = 12, height = 8)

##### Bar graph of the number of events by strongholds (2021) ####

df %>%
  mutate(
    Stronghold2021 = case_when(
      Winner.Name.2021 == "RHDP" ~ "Incumbent stronghold (RHDP)",
      Winner.Name.2021 == "Independent" ~ "Independents",
      TRUE ~ "Opposition stronghold"
    ),
  ) %>%
  group_by(Stronghold2021) %>%
  summarise(Events = sum(Electoral.Violence.Events)) %>%
  ggplot() +
  geom_bar(aes(x = Stronghold2021, y = Events/sum(Events)), stat = "identity", fill = "grey68") +
  scale_y_continuous(labels = scales::percent_format(accuracy = 5L), breaks = seq(0, 1, 0.1), limits = c(0, 1)) +
  labs(x = NULL, y = "Share of events") +
  my_theme

ggsave("strongholds2021.png", width = 12, height = 8)