SELECT nr.relationship_name, count(distinct nv.victim_id) as victim_count FROM nibrs_victim_offender_rel nvor
join nibrs_relationship nr on nvor.relationship_id = nr.relationship_id
join nibrs_offender nio on nvor.offender_id = nio.offender_id
join nibrs_victim nv on nvor.victim_id = nv.victim_id
group by nr.relationship_name
order by victim_count desc
;
-> The majority of the relationship are unknowns or strangers, if they are known it is most likely the victims are offenders' boyfriends/girlfriends, acquaintance, or otherwise known.
SELECT nv.sex_code, count(distinct nv.victim_id) as victim_count FROM nibrs_victim nv
group by nv.sex_code
order by victim_count desc
;
-> Most of the victims are males, followed by females. There are also X genders as well, while a small number of 13344 victims are unknown/unidentified.
SELECT rr.race_desc, count(distinct nv.victim_id) as victim_count FROM nibrs_victim nv
join ref_race rr on rr.race_id = nv.race_id
group by rr.race_desc
order by victim_count desc
;
-> Most of the victims are whites, and a lot more are not specified or unknown, followed by Asian victims.
SELECT nvt.victim_type_name, count(distinct nv.victim_id) as victim_count FROM nibrs_victim nv
join nibrs_victim_type nvt on nv.victim_type_id = nvt.victim_type_id
group by nvt.victim_type_name
order by victim_count desc
;
-> A lot of the victims are individuals, business, and society/public. Other victims spread out and diverse in the type of victims as well.
SELECT nat.assignment_type_name, nvt.victim_type_name, count(distinct nv.victim_id) as victim_count FROM nibrs_victim nv
join nibrs_assignment_type nat on nv.assignment_type_id = nat.assignment_type_id
join nibrs_victim_type nvt on nv.victim_type_id = nvt.victim_type_id
group by nat.assignment_type_name, nvt.victim_type_name
order by victim_count desc
;
-> Among the victims that are law enforcement officers, lots of them died while on assignment alone or assisted, and they are on duty as one officer. Hence, it seems there's more risks doing activities with only one officer.
SELECT nat.activity_type_name, nvt.victim_type_name, count(distinct nv.victim_id) as victim_count FROM nibrs_victim nv
join nibrs_activity_type nat on nv.activity_type_id = nat.activity_type_id
join nibrs_victim_type nvt on nv.victim_type_id = nvt.victim_type_id
group by nat.activity_type_name, nvt.victim_type_name
order by victim_count desc
;
-> Most officers were victims while responding to disturbance call, attempting other arrests, or handling custody of prisoners. Other activities are present as well and less common.
### circumstances
SELECT nc.circumstances_name, count(distinct nv.victim_id) as victim_count FROM nibrs_victim nv
join nibrs_victim_circumstances nvc on nv.victim_id = nvc.victim_id
join nibrs_circumstances nc on nvc.circumstances_id = nc.circumstances_id
group by nc.circumstances_name
order by victim_count desc
;
### justifiable force
SELECT nf.justifiable_force_name, count(distinct nv.victim_id) as victim_count FROM nibrs_victim nv
join nibrs_victim_circumstances nvc on nv.victim_id = nvc.victim_id
join nibrs_justifiable_force nf on nf.justifiable_force_id = nvc.justifiable_force_id
group by nf.justifiable_force_name
order by victim_count desc
;
-> Under special circumstances, criminals were killed by either private citizen or police officer, namely because the criminals posed a danger during the incident.
select niot.offense_category_name, count(distinct nio.offense_id) as offense_count from nibrs_offense nio
join nibrs_offense_type niot on nio.offense_code = niot.offense_code
group by niot.offense_category_name
order by offense_count desc
;
-> Most of the offenses are theft offenses, assault offenses, and offenses involved drugs. It's interesting to see that Motor Vehicle Theft is a theft offense as well but it has its own seperate category.
select a.county_name, count(distinct ni.incident_id) as incident_count from agencies a
join nibrs_incident ni on a.agency_id = ni.agency_id
group by a.county_name
order by incident_count desc
;
-> Look at the top, we can see that San Diego has the highest number of incidents compared to other counties, it could possibly be because of its close proximity to the border with nearby countries.
select niot.offense_category_name, count(distinct ni.incident_id) as incident_count from nibrs_incident ni
join nibrs_offense nio on nio.incident_id = ni.incident_id
join nibrs_offense_type niot on niot.offense_code = nio.offense_code
group by niot.offense_category_name
order by incident_count desc
;
-> Theft offenses top the list. The statistics are slightly different compared to number of offenses per offense category because there could be multiple offenses per one incident. Hence, in one vandalism incident for example, there could be several drug offenses involved.
with cte as (SELECT incident_id, count(distinct offense_id) as offense_count FROM nibrs_offense
group by incident_id
order by offense_count desc)
select round(avg(offense_count), 2) as average_offense_number from cte
;
-> On average, there is typically one offense committed in each incident.
with cte as (SELECT incident_id, count(distinct offense_id) as offense_count FROM nibrs_offense
group by incident_id
order by offense_count desc)
select offense_count, count(offense_count) as offense_count from cte
group by offense_count
order by offense_count desc
;
-> Typically, there's one offense in each incident, while there are 3 incidents with maximum 8 offenses each.
select ni.injury_name, count(distinct nvj.victim_id) as victim_count from nibrs_victim_injury nvj
join nibrs_injury ni on nvj.injury_id = ni.injury_id
group by ni.injury_name
order by victim_count desc
;
-> The majority of victims sustained none or minor injuries.
select nwt.weapon_name, count(distinct nio.incident_id) as incident_count from nibrs_weapon nw
join nibrs_weapon_type nwt on nw.weapon_id = nwt.weapon_id
join nibrs_offense nio on nio.offense_id = nw.offense_id
group by nwt.weapon_name
order by incident_count desc
;
-> A variety of weapons were used, most of them were personal weapons or other types of weapons.
select nbl.bias_name, count(distinct nvj.victim_id) as offense_count from nibrs_bias_list nbl
join nibrs_bias_motivation nbm on nbl.bias_id = nbm.bias_id
join nibrs_offense nio on nio.offense_id = nbm.offense_id
join nibrs_victim nv on nv.incident_id = nio.incident_id
join nibrs_victim_injury nvj on nvj.victim_id = nv.victim_id
join nibrs_injury ni on ni.injury_id = nvj.injury_id
where ni.injury_name = 'None'
group by nbl.bias_name
order by offense_count desc
;
-> It's rare for a bias to occur, and bias is subjective so it's hard to classify. Hence, the majority of biases are none or unknown.
# victim count
select DISTINCT
CASE
when age_num = 'NS' then 'Not Specified'
when age_num = 'BB' then '7-364 Days Old'
when age_num = 'NB' then '1-6 Days Old'
when age_num = 'NN' then 'Under 24 Hours'
when age_num = 00 then 'Unknown'
when age_num = '99' then 'Over 98 Years Old'
WHEN age_num < 20 THEN 'Under 20'
WHEN age_num BETWEEN 20 AND 29 THEN '20-29'
WHEN age_num BETWEEN 30 AND 39 THEN '30-39'
WHEN age_num BETWEEN 40 AND 49 THEN '40-49'
WHEN age_num BETWEEN 50 AND 59 THEN '50-59'
WHEN age_num >= 60 THEN '60 and above'
END Age_Group, count(distinct victim_id) as victim_count from nibrs_victim
group by Age_group
order by victim_count desc;
# offender count
select DISTINCT
CASE
when age_num = 'NS' then 'Not Specified'
when age_num = 'BB' then '7-364 Days Old'
when age_num = 'NB' then '1-6 Days Old'
when age_num = 'NN' then 'Under 24 Hours'
when age_num = 00 then 'Unknown'
when age_num = '99' then 'Over 98 Years Old'
WHEN age_num < 20 THEN 'Under 20'
WHEN age_num BETWEEN 20 AND 29 THEN '20-29'
WHEN age_num BETWEEN 30 AND 39 THEN '30-39'
WHEN age_num BETWEEN 40 AND 49 THEN '40-49'
WHEN age_num BETWEEN 50 AND 59 THEN '50-59'
WHEN age_num >= 60 THEN '60 and above'
END Age_Group, count(distinct offender_id) as offender_count from nibrs_offender
group by Age_group
order by offender_count desc;
-> Age of victims and offenders are not specified most of the time. If their age were specified, the age range was most likely in the range 20 to 49.
with cte as (select DISTINCT
CASE
when age_num = 'NS' then 'Not Specified'
when age_num = 'BB' then '7-364 Days Old'
when age_num = 'NB' then '1-6 Days Old'
when age_num = 'NN' then 'Under 24 Hours'
when age_num = 00 then 'Unknown'
when age_num = '99' then 'Over 98 Years Old'
WHEN age_num < 20 THEN 'Under 20'
WHEN age_num BETWEEN 20 AND 29 THEN '20-29'
WHEN age_num BETWEEN 30 AND 39 THEN '30-39'
WHEN age_num BETWEEN 40 AND 49 THEN '40-49'
WHEN age_num BETWEEN 50 AND 59 THEN '50-59'
WHEN age_num >= 60 THEN '60 and above'
END Age_Group, victim_id from nibrs_victim)
select cte.Age_Group, nwt.weapon_name, count(distinct cte.victim_id) as victim_count from cte
join nibrs_victim_offense nvo on cte.victim_id = nvo.victim_id
join nibrs_weapon nw on nw.offense_id = nvo.offense_id
join nibrs_weapon_type nwt on nwt.weapon_id = nw.weapon_id
where Age_Group in ('1-6 Days Old', 'Under 24 Hours', '7-364 Days Old')
group by cte.Age_Group, nwt.weapon_name
order by victim_count desc
;
-> Most weapons were personal weapons
with cte as (select DISTINCT
CASE
when age_num = 'NS' then 'Not Specified'
when age_num = 'BB' then '7-364 Days Old'
when age_num = 'NB' then '1-6 Days Old'
when age_num = 'NN' then 'Under 24 Hours'
when age_num = 00 then 'Unknown'
when age_num = '99' then 'Over 98 Years Old'
WHEN age_num < 20 THEN 'Under 20'
WHEN age_num BETWEEN 20 AND 29 THEN '20-29'
WHEN age_num BETWEEN 30 AND 39 THEN '30-39'
WHEN age_num BETWEEN 40 AND 49 THEN '40-49'
WHEN age_num BETWEEN 50 AND 59 THEN '50-59'
WHEN age_num >= 60 THEN '60 and above'
END Age_Group, victim_id from nibrs_victim)
select cte.Age_Group, ni.injury_name, count(distinct cte.victim_id) as victim_count from cte
join nibrs_victim_injury nvi on cte.victim_id = nvi.victim_id
join nibrs_injury ni on ni.injury_id = nvi.injury_id
where Age_Group in ('1-6 Days Old', 'Under 24 Hours', '7-364 Days Old')
group by cte.Age_Group, ni.injury_name
order by victim_count desc
;
-> Most of them had no injuries.
select nlt.location_name, count(distinct nio.offense_id) as offense_count from nibrs_offense nio
join nibrs_location_type nlt on nio.location_id = nlt.location_id
group by nlt.location_name
order by offense_count desc
;
-> Look at the top 3, most crimes took place at a residence, on the street, and parking areas.
# incidents
select date_format(incident_date, '%m') as converted_month, count(distinct incident_id) as incident_count from nibrs_incident
group by converted_month
order by incident_count desc
;
# arrests
select date_format(arrest_date, '%m') as converted_month, count(distinct arrestee_id) as arrestee_count from nibrs_arrestee
group by converted_month
order by arrestee_count desc
;
-> Interestingly, most incidents and arrests made were found in the second half of the year which reached its peak in the month of October and December. It could be because there were more holidays and summer seasons during this time period, hence resulting in more crimes committed since there were more human interactions.
21. On average per offense, county, race, and gender, how long does it take to submit the report since the incident and arrest date?
select niot.offense_name, round(avg(datediff(date(ni.submission_date), date(ni.incident_date))), 2)
as avg_days_submitted_since_the_incident,
round(avg(datediff(date(ni.submission_date), date(na.arrest_date))), 2) as avg_days_submitted_since_the_arrest,
round(avg(datediff(date(na.arrest_date), date(ni.incident_date))), 2) as avg_days_arrested_since_the_incident
from nibrs_incident ni
join nibrs_arrestee na on ni.incident_id = na.incident_id
join nibrs_offense_type niot on niot.offense_code = na.offense_code
group by niot.offense_name
order by niot.offense_name
;
-> On average, Welfare Fraud took the longest to arrest the criminals since the incident. Human Trafficking took the longest to submit the report since the incident. Negligent Manslaughter took the longest to submit the report since the arrest. The more complicated a crime is, the more likely reports took longer to prepare and submit. Difficult and hard-to-track cases or require long investigations also took long time to arrest since the incident as well.
select a.county_name, round(avg(datediff(date(ni.submission_date), date(ni.incident_date))), 2)
as avg_days_submitted_since_the_incident,
round(avg(datediff(date(ni.submission_date), date(na.arrest_date))), 2) as avg_days_submitted_since_the_arrest,
round(avg(datediff(date(na.arrest_date), date(ni.incident_date))), 2) as avg_days_arrested_since_the_incident
from nibrs_incident ni
join nibrs_arrestee na on ni.incident_id = na.incident_id
join agencies a on a.agency_id = ni.agency_id
group by a.county_name
order by a.county_name
;
-> SAN JOAQUIN county took the longest to make the arrest since the incident. CALAVERAS county took the longest to submit the reports since the incident and arrest.
select rr.race_desc, round(avg(datediff(date(ni.submission_date), date(ni.incident_date))), 2)
as avg_days_submitted_since_the_incident,
round(avg(datediff(date(ni.submission_date), date(na.arrest_date))), 2) as avg_days_submitted_since_the_arrest,
round(avg(datediff(date(na.arrest_date), date(ni.incident_date))), 2) as avg_days_arrested_since_the_incident
from nibrs_incident ni
join nibrs_arrestee na on ni.incident_id = na.incident_id
join ref_race rr on rr.race_id = na.race_id
group by rr.race_desc
order by rr.race_desc
;
-> Native Hawaiian or Other Pacific Islander took the longest to arrest since the incident. American Indian or Alaska Native took the longest to submit the reports since the incident and arrest, followed by Native Hawaiian or Other Pacific Islander. It could be because they live in tribal community or hard-to-reach places so it takes time to track the cases.
select na.sex_code, round(avg(datediff(date(ni.submission_date), date(ni.incident_date))), 2)
as avg_days_submitted_since_the_incident,
round(avg(datediff(date(ni.submission_date), date(na.arrest_date))), 2) as avg_days_submitted_since_the_arrest,
round(avg(datediff(date(na.arrest_date), date(ni.incident_date))), 2) as avg_days_arrested_since_the_incident
from nibrs_incident ni
join nibrs_arrestee na on ni.incident_id = na.incident_id
group by na.sex_code
order by na.sex_code
;
-> On average, male criminals took the longest to arrest and submit the reports compared to female criminals.
select date_format(ni.incident_date, '%m') as converted_month, niot.offense_category_name, count(distinct nio.offense_id) as offense_count
from nibrs_incident ni
join nibrs_offense nio on nio.incident_id = ni.incident_id
join nibrs_offense_type niot on niot.offense_code = nio.offense_code
group by converted_month, niot.offense_category_name
order by offense_count desc
limit 10
;
-> Look at the top 10, we can see that most of the offenses were theft offenses and took place in the second half of the year.
23. Why some counties took longer to submit and arrest? Let's find out statistics related to suburban area, total officers, and total civilian by county.
On average, how many counties took longer than 100 days to submit and longer than 5 days to arrest if the area is suburban or not?
with cte as (select a.county_name, a.suburban_area_flag, round(avg(datediff(date(ni.submission_date), date(ni.incident_date))), 2)
as avg_days_submitted_since_the_incident,
round(avg(datediff(date(ni.submission_date), date(na.arrest_date))), 2) as avg_days_submitted_since_the_arrest,
round(avg(datediff(date(na.arrest_date), date(ni.incident_date))), 2) as avg_days_arrested_since_the_incident
from nibrs_incident ni
join nibrs_arrestee na on ni.incident_id = na.incident_id
join agencies a on a.agency_id = ni.agency_id
group by a.county_name, a.suburban_area_flag
order by a.county_name)
select suburban_area_flag, count(suburban_area_flag) as counties_count from cte
where avg_days_submitted_since_the_incident > 100 and avg_days_submitted_since_the_arrest > 100
group by suburban_area_flag
order by counties_count desc
;
with cte as (select a.county_name, a.suburban_area_flag, round(avg(datediff(date(ni.submission_date), date(ni.incident_date))), 2)
as avg_days_submitted_since_the_incident,
round(avg(datediff(date(ni.submission_date), date(na.arrest_date))), 2) as avg_days_submitted_since_the_arrest,
round(avg(datediff(date(na.arrest_date), date(ni.incident_date))), 2) as avg_days_arrested_since_the_incident
from nibrs_incident ni
join nibrs_arrestee na on ni.incident_id = na.incident_id
join agencies a on a.agency_id = ni.agency_id
group by a.county_name, a.suburban_area_flag
order by a.county_name)
select suburban_area_flag, count(suburban_area_flag) as counties_count from cte
where avg_days_submitted_since_the_incident > 100 and avg_days_submitted_since_the_arrest > 100
and avg_days_arrested_since_the_incident > 5
group by suburban_area_flag
order by counties_count desc
;
-> On average, if the area is not suburban, it is likely that the days submitted and arrested took longer than if the area is suburban. While this insight helps us discover facts regarding suburban areas, it still has not solved the mystery of why some counties such as Santa Clara took longer to submit and arrest even if the area is suburban. So let's look at other metrics
with cte2 as (with cte1 as (with cte as (select county_name, sum(male_officer) as total_male_officer_count, sum(male_civilian) as total_male_civilian_count,
sum(male_total) as total_male_count, sum(female_officer) as total_female_officer_count, sum(female_civilian) as total_female_civilian_count,
sum(female_total) as total_female_count
from agencies
group by county_name)
select county_name, total_male_officer_count + total_female_officer_count as total_officer_count, total_male_civilian_count +
total_female_civilian_count as total_civilian_count
from cte
order by county_name)
select county_name, round(total_officer_count/total_civilian_count, 2) as officer_to_civilian_ratio from cte1),
cte3 as (select a.county_name, round(avg(datediff(date(ni.submission_date), date(ni.incident_date))), 2)
as avg_days_submitted_since_the_incident,
round(avg(datediff(date(ni.submission_date), date(na.arrest_date))), 2) as avg_days_submitted_since_the_arrest,
round(avg(datediff(date(na.arrest_date), date(ni.incident_date))), 2) as avg_days_arrested_since_the_incident
from nibrs_incident ni
join nibrs_arrestee na on ni.incident_id = na.incident_id
join agencies a on a.agency_id = ni.agency_id
group by a.county_name
order by a.county_name)
select cte2.county_name, cte2.officer_to_civilian_ratio, cte3.avg_days_submitted_since_the_incident,
cte3.avg_days_submitted_since_the_arrest, cte3.avg_days_arrested_since_the_incident from cte2
join cte3 on cte2.county_name = cte3.county_name
;
select @ax := avg(officer_to_civilian_ratio),
@ay := avg(avg_days_submitted_since_the_incident),
@div := (stddev_samp(officer_to_civilian_ratio) * stddev_samp(avg_days_submitted_since_the_incident))
from officer_to_civilian_ratio_per_county;
select sum((officer_to_civilian_ratio - @ax) * (avg_days_submitted_since_the_incident - @ay) )/((count(officer_to_civilian_ratio) -1) * @div)
as correlation
from officer_to_civilian_ratio_per_county;
-> They have a weak negative relationship
select @ax := avg(officer_to_civilian_ratio),
@ay := avg(avg_days_submitted_since_the_arrest),
@div := (stddev_samp(officer_to_civilian_ratio) * stddev_samp(avg_days_submitted_since_the_arrest))
from officer_to_civilian_ratio_per_county;
select sum((officer_to_civilian_ratio - @ax) * (avg_days_submitted_since_the_arrest - @ay) )/((count(officer_to_civilian_ratio) -1) * @div)
as correlation
from officer_to_civilian_ratio_per_county;
-> They have a weak negative relationship
select @ax := avg(officer_to_civilian_ratio),
@ay := avg(avg_days_arrested_since_the_incident),
@div := (stddev_samp(officer_to_civilian_ratio) * stddev_samp(avg_days_arrested_since_the_incident))
from officer_to_civilian_ratio_per_county;
select sum((officer_to_civilian_ratio - @ax) * (avg_days_arrested_since_the_incident - @ay) )/((count(officer_to_civilian_ratio) -1) * @div)
as correlation
from officer_to_civilian_ratio_per_county;
-> They have a weak positive relationship
->> In conclusion, the ratio has nothing to do with the processing time of submission and arrest. So it seems it is hard to find out exactly reasons why some counties took longer other than the reasons related to suburban areas explained above. There are many other metrics we can investigate, it will be a topic for another day.