Stop & Search Police Data Visualisation

The raw data on “Stop and Search” comes from the following webiste.

I will analyse the follwing data: Date Range: Sep 2020 Metropolitan Police Service Include stop and search data

Basing on the “Stop and Search” dataset, I will prepare 3 visualisations having in mind best visualisation practics.

Loading Data

df<- read.csv("~/Documents/LBS/Data_Visualisation/01/2020-09/2020-09-metropolitan-stop-and-search.csv",na.strings="") #loading the file

df<- df %>% 
  clean_names() %>% 
  dplyr::select(1:3,5:13)


df_clean<- df %>%  na.omit()

#describe(df_clean)


df_clean<- df_clean %>% mutate(gender=as.factor(gender), age_range= as.factor(age_range),  officer_defined_ethnicity = as.factor( officer_defined_ethnicity))

df_clean$officer_defined_ethnicity <- df_clean$officer_defined_ethnicity %>% factor(levels= c("White", "Black", "Asian", "Other"))
DF <- table(df_clean$officer_defined_ethnicity)
DF
## 
## White Black Asian Other 
##  6348  5331  2759   711
glimpse(df)
## Rows: 19,928
## Columns: 12
## $ type                         <chr> "Person and Vehicle search", "Person sea…
## $ date                         <chr> "2020-08-31T23:00:00+00:00", "2020-08-31…
## $ part_of_a_policing_operation <chr> "False", "False", "False", "False", "Fal…
## $ latitude                     <dbl> 51.6, 51.4, 51.5, NA, NA, NA, 51.5, 51.4…
## $ longitude                    <dbl> 0.2153, -0.2253, 0.0443, NA, NA, NA, -0.…
## $ gender                       <chr> "Female", "Male", "Male", "Male", "Male"…
## $ age_range                    <chr> "18-24", "18-24", "over 34", NA, "18-24"…
## $ self_defined_ethnicity       <chr> "White - English/Welsh/Scottish/Northern…
## $ officer_defined_ethnicity    <chr> "White", "White", "White", "Black", "Bla…
## $ legislation                  <chr> "Misuse of Drugs Act 1971 (section 23)",…
## $ object_of_search             <chr> "Controlled drugs", "Evidence of offence…
## $ outcome                      <chr> "A no further action disposal", "A no fu…

First Graph

library(showtext)
font_add_google("Montserrat", "Montserrat") #downloading fonts from Google
showtext_auto()


df_race<- df_clean %>%  #only two genders
  filter(gender!="Other")
df_race<- df_race %>% 
  group_by(officer_defined_ethnicity) %>% 
  summarise(count = n())
df_race<- df_race %>% 
  mutate(percent_race=count/sum(count))


my_colours <- c("grey70", "tomato")
df1<- df_race %>% 
mutate(
    was_white = ifelse(officer_defined_ethnicity == "White", TRUE, FALSE))


ggplot(df1, aes(x=officer_defined_ethnicity, y=percent_race, fill=was_white)) +
    geom_bar(stat="identity", alpha=0.7)+
theme_classic() +
  theme(panel.grid.major.y = element_line(color = "gray60", size = 0.1),
        panel.background = element_rect(fill = "white", colour = "white"),
        axis.line = element_line(size = 1, colour = "grey80"),
        axis.ticks = element_line(size = 3,colour = "grey80"),
        axis.ticks.length = unit(.20, "cm"),
        plot.title = element_text(color = "tomato",size=15,face="bold", family= "Montserrat"),
        plot.subtitle = element_text(color = "tomato", face="plain", ,size= 10,family= "Montserrat"),
        plot.caption = element_text(color = "grey40", face="italic", ,size= 7,family= "Montserrat",hjust=0),
        axis.title.y = element_text(size = 8, angle = 90, family="Montserrat", face = "bold"),
        axis.text.y=element_text(family="Montserrat", size=7),
        axis.title.x = element_text(size = 8, family="Montserrat", face = "bold"),
        axis.text.x=element_text(family="Montserrat", size=7),
        legend.text=element_text(family="Montserrat", size=7),
        legend.title=element_text(family="Montserrat", size=8, face="bold"),
        legend.position = "none")+
    labs(title = " White people accounted to 41.9% of all the cases", subtitle= "Proportion of Stop & Searches by Ethnicity ", x="Officer defined ethnicity", y=" Percent", caption="Source:  https://data.police.uk/data/") +
  scale_y_continuous(labels = scales::percent)+
  scale_fill_manual(values = my_colours)

Second Graph

df_gender <-df_clean %>% 
  filter(gender!="Other") %>% 
  group_by(outcome,gender) %>% 
  summarise(count=n(),
            total_percent = ( count/15149)) %>% 
mutate(result = case_when(
    outcome %in% c("A no further action disposal") ~ "No futher action",
    outcome %in% c("Arrest") ~ "Arrest",
    outcome %in% c("Caution (simple or conditional)") ~ "Caution",
    outcome %in% c("Community resolution") ~ "Community",
    outcome %in% c("Penalty Notice for Disorder") ~ "Penalty",
    TRUE ~ "Summons"
    ),
    was_female= ifelse(gender== "Female", TRUE, FALSE),
    percent_female=(count/sum(count)))

df_gender<- df_gender %>% 
mutate(percent_female= ifelse(gender== "Female",percent_female,NA))


ggplot(df_gender, aes(x=total_percent, y=reorder(result,total_percent), fill=was_female)) +
    geom_bar(stat="identity", alpha=0.7)+
theme_classic() +
  theme(panel.grid.major.y = element_line(color = "gray60", size = 0.1),
        panel.background = element_rect(fill = "white", colour = "white"),
        axis.line = element_line(size = 1, colour = "grey80"),
        axis.ticks = element_line(size = 3,colour = "grey80"),
        axis.ticks.length = unit(.20, "cm"),
        plot.title = element_text(color = "tomato",size=9.5,face="bold", family= "Montserrat"),
        plot.subtitle = element_text(color = "tomato", face="plain", ,size= 9,family= "Montserrat"),
        plot.caption = element_text(color = "grey40", face="italic", ,size= 7,family= "Montserrat",hjust=0),
        axis.title.y = element_text(size = 7, angle = 90, family="Montserrat", face = "bold"),
        axis.text.y=element_text(family="Montserrat", size=7),
        axis.title.x = element_text(size = 7, family="Montserrat", face = "bold"),
        axis.text.x=element_text(family="Montserrat", size=7),
        legend.text=element_text(family="Montserrat", size=7),
        legend.title=element_text(family="Montserrat", size=8, face="bold"))+
    labs(title = "Women comprised the biggest percentage in S&S actions resulting with\n Community resolution", subtitle= " Proportion of Stop & Searches by Gender and Result", x="Percent", y="Result",fill= "Gender",caption="Source:  https://data.police.uk/data/") +
  scale_x_continuous(labels = scales::percent)+
  scale_fill_manual(values = my_colours, labels = c( "Male", "Female")) +
  geom_text(
    aes(label = round(percent_female,3)*100, x = round(percent_female,3)/2), 
    color = "tomato",
    size = 3,
    hjust = 0.5) 

Third Graph

df3<- df_clean %>% 
  group_by(age_range,gender,object_of_search) %>% 
  summarise(count = n())

df3$age_range <- df3$age_range %>% factor(levels= c("under 10", "10-17", "18-24", "25-34", "over 34")) 
  
df3<- df3 %>% 
  filter(age_range!="under 10") %>% 
  mutate(percent_race=count/sum(count))
df3<- df3 %>% 
 mutate(percentageoftotal = (count/15144),
object_of_search=as.factor(object_of_search)) %>% 
  mutate(object = case_when(
    object_of_search %in% c("Anything to threaten or harm anyone","Firearms","Offensive weapons") ~ "Objects to threaten or harm",
    object_of_search %in% c("Articles for use in criminal damage") ~ "Objects for use in criminal damage",
    object_of_search %in% c("Controlled drugs") ~ "Controlled drugs",
    object_of_search %in% c("Evidence of offences under the Act") ~ "Evidence of offences under the Act",
    object_of_search %in% c("Stolen goods") ~ "Stolen goods",
    object_of_search %in% c("Fireworks") ~ "Fireworks",
    TRUE ~ "Not Stated"
  ))


colours_age <- c("#D3C54D","#B4D63E","#54B4BD","#1D445F", "#14848F","#FDAF49", "#C17716", "#F58723")

ggplot(df3, aes(x=age_range, 
                       y = percentageoftotal, 
                                  fill = object_of_search)) + 
  geom_bar(stat = "identity", width=0.7, position="stack") + theme_classic() +
  theme(panel.grid.major.y = element_line(color = "gray60", size = 0.1),
        panel.background = element_rect(fill = "white", colour = "white"),
        axis.line = element_line(size = 1, colour = "grey80"),
        axis.ticks = element_line(size = 3,colour = "grey80"),
        axis.ticks.length = unit(.20, "cm"),
        plot.title = element_text(color = "grey20",size=9.5,face="bold", family= "Montserrat"),
        plot.subtitle = element_text(color = "grey20", face="plain", ,size= 9,family= "Montserrat"),
        plot.caption = element_text(color = "grey40", face="italic", ,size= 7,family= "Montserrat",hjust=0),
        axis.title.y = element_text(size = 8, angle = 90, family="Montserrat", face = "bold"),
        axis.text.y=element_text(family="Montserrat", size=7),
        axis.title.x = element_text(size = 8, family="Montserrat", face = "bold"),
        axis.text.x=element_text(family="Montserrat", size=7),
        legend.text=element_text(family="Montserrat", size=5.5),
        legend.title=element_text(family="Montserrat", size=6, face="bold"))+
    labs(title = "Across all the age groups, controlled drugs was the main reason of search", subtitle= "Proportion of Stop & Searches by age and object of search", x="Officer defined ethnicity", y="Percent",fill= "Object of search",caption="Source:  https://data.police.uk/data/") +
  scale_y_continuous(labels = scales::percent)+
  scale_fill_manual(values= colours_age)

Thank you!