Author: Jaime M. Shaker
Email: [email protected]
Website: https://www.shaker.dev
LinkedIn: https://www.linkedin.com/in/jaime-shaker/
❗ If you find this repository helpful, please consider giving it a ⭐. Thanks! ❗
1. List the total number of reported crimes between 2018 and 2023?
SELECT
to_char(count(*), '9g999g999') AS "Total Reported Crimes"
FROM
chicago.crimes;
Results:
Total Reported Crimes |
---|
1,450,979 |
2. List the total amount of Homicides, Batteries and Assaults reported between 2018 and 2023.
SELECT
initcap(crime_type) AS crime_type,
count(*) AS n_crimes
FROM
chicago.crimes
WHERE
crime_type IN ('homicide', 'battery', 'assault')
GROUP BY
crime_type
ORDER BY
n_crimes DESC;
Results:
crime_type | n_crimes |
---|---|
Battery | 266357 |
Assault | 122997 |
Homicide | 4066 |
3. Which are the 3 most common crimes reported and what percentage amount are they from the total amount of reported crimes?
WITH get_top_crime AS (
SELECT
initcap(crime_type) AS crime_type,
count(*) AS n_crimes
FROM
chicago.crimes
GROUP BY
crime_type
ORDER BY
n_crimes DESC
)
SELECT
crime_type,
n_crimes,
round(100 * n_crimes::NUMERIC / sum(n_crimes) OVER (), 2) AS total_percentage
FROM
get_top_crime
LIMIT 3;
Results:
crime_type | n_crimes | total_percentage |
---|---|---|
Theft | 321957 | 22.19 |
Battery | 266357 | 18.36 |
Criminal Damage | 161766 | 11.15 |
4. What are the top ten communities that had the MOST amount of crimes reported? Include the current population, density and order by the number of reported crimes.
SELECT
initcap(t2.community_name) AS community,
t2.population,
t2.density,
count(*) AS reported_crimes
FROM
chicago.crimes AS t1
JOIN
chicago.community AS t2
ON
t2.community_id = t1.community_id
GROUP BY
t2.community_name,
t2.population,
t2.density
ORDER BY
reported_crimes DESC
LIMIT 10;
Results:
community | population | density | reported_crimes |
---|---|---|---|
Austin | 96557 | 13504.48 | 79271 |
Near North Side | 105481 | 38496.72 | 63084 |
Near West Side | 67881 | 11929.88 | 52091 |
South Shore | 53971 | 18420.14 | 49722 |
Loop | 42298 | 25635.15 | 49003 |
North Lawndale | 34794 | 10839.25 | 46155 |
Humboldt Park | 54165 | 15045.83 | 41949 |
West Town | 87781 | 19166.16 | 40772 |
Auburn Gresham | 44878 | 11903.98 | 40514 |
Greater Grand Crossing | 31471 | 8865.07 | 37429 |
5. What are the top ten communities that had the LEAST amount of crimes reported? Include the current population, density and order by the number of reported crimes.
SELECT
initcap(t2.community_name) AS community,
t2.population,
t2.density,
count(*) AS reported_crimes
FROM
chicago.crimes AS t1
JOIN
chicago.community AS t2
ON
t2.community_id = t1.community_id
GROUP BY
t2.community_name,
t2.population,
t2.density
ORDER BY
reported_crimes
LIMIT 10;
Results:
community | population | density | reported_crimes |
---|---|---|---|
Edison Park | 11525 | 10199.12 | 1623 |
Burnside | 2527 | 4142.62 | 2129 |
Forest Glen | 19596 | 6123.75 | 3135 |
Mount Greenwood | 18628 | 6873.8 | 3150 |
Montclare | 14401 | 14546.46 | 3616 |
Hegewisch | 10027 | 1913.55 | 3632 |
Oakland | 6799 | 11722.41 | 4267 |
Fuller Park | 2567 | 3615.49 | 4342 |
Archer Heights | 14196 | 7062.69 | 5036 |
Mckinley Park | 15923 | 11292.91 | 5048 |
6. What month had the most crimes reported and what was the average and median temperature high in the last six years?
ELECT
to_char(t1.reported_crime_date, 'Month') AS month,
COUNT(*) AS n_crimes,
round(avg(t2.temp_high), 1) avg_high_temp,
PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY t2.temp_high) AS median_high_temp
FROM
chicago.crimes AS t1
JOIN
chicago.weather AS t2
ON
t1.reported_crime_date = t2.weather_date
GROUP BY
month
ORDER BY
n_crimes DESC;
Results:
month | n_crimes | avg_high_temp | median_high_temp |
---|---|---|---|
July | 135240 | 85.1 | 86.0 |
August | 134712 | 84.1 | 85.0 |
October | 128470 | 62.9 | 62.0 |
June | 127774 | 81.6 | 81.0 |
September | 127567 | 77.4 | 78.0 |
May | 126130 | 72.2 | 73.0 |
December | 117531 | 41.3 | 41.0 |
November | 116688 | 48.3 | 47.0 |
March | 113630 | 47.7 | 47.0 |
January | 113133 | 33.4 | 34.0 |
April | 109391 | 57.8 | 56.0 |
February | 100713 | 36.6 | 37.0 |
7. What month had the most homicides reported and what was the average and median temperature high in the last six years?
SELECT
to_char(t1.reported_crime_date, 'Month') AS month,
COUNT(*) AS n_crimes,
round(avg(t2.temp_high), 1) avg_high_temp,
PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY t2.temp_high) AS median_high_temp
FROM
chicago.crimes AS t1
JOIN
chicago.weather AS t2
ON
t1.reported_crime_date = t2.weather_date
WHERE
t1.crime_type = 'homicide'
GROUP BY
month
ORDER BY
n_crimes DESC;
Results:
month | n_crimes | avg_high_temp | median_high_temp |
---|---|---|---|
July | 459 | 85.3 | 86.0 |
June | 432 | 82.5 | 83.0 |
September | 404 | 78.1 | 79.0 |
May | 391 | 73.1 | 74.0 |
August | 389 | 84.4 | 85.0 |
October | 350 | 63.9 | 64.0 |
April | 331 | 59.8 | 58.0 |
November | 317 | 50.2 | 49.0 |
December | 287 | 41.7 | 42.0 |
January | 256 | 33.8 | 34.0 |
February | 228 | 36.5 | 37.5 |
March | 222 | 49.1 | 48.0 |
8. List the most violent year and the number of arrests with percentage. Order by the number of crimes in decending order. Determine the most violent year by the number of reported Homicides, Assaults and Battery for that year.
WITH get_arrest_percentage AS (
SELECT
EXTRACT('year' FROM t1.reported_crime_date) AS most_violent_year,
count(*) AS reported_violent_crimes,
sum(
CASE
WHEN arrest = TRUE THEN 1
ELSE 0
END
) AS number_of_arrests
FROM
chicago.crimes AS t1
WHERE
crime_type IN ('homicide', 'battery', 'assault')
GROUP BY
most_violent_year
ORDER BY
reported_violent_crimes DESC
)
SELECT
most_violent_year,
reported_violent_crimes,
number_of_arrests || ' (' || round(100 * number_of_arrests::NUMERIC / reported_violent_crimes, 2) || '%)' AS number_of_arrests
FROM
get_arrest_percentage;
Results:
most_violent_year | reported_violent_crimes | number_of_arrests |
---|---|---|
2018 | 70835 | 13907 (19.63%) |
2019 | 70645 | 14334 (20.29%) |
2023 | 67355 | 9340 (13.87%) |
2022 | 62412 | 8165 (13.08%) |
2021 | 61611 | 7855 (12.75%) |
2020 | 60562 | 9577 (15.81%) |
9. List the day of the week, year, average precipitation, average high temperature and the highest number of reported crimes for days with and without precipitation.
DROP TABLE IF EXISTS weekday_precipitation_values;
CREATE TEMP TABLE weekday_precipitation_values AS (
SELECT
EXTRACT('year' FROM t1.reported_crime_date) AS crime_year,
to_char(t1.reported_crime_date, 'Day') AS day_of_week,
round(avg(t2.precipitation)::NUMERIC, 2) AS avg_precipitation,
round(avg(t2.temp_high)::NUMERIC, 2) AS avg_temp_high,
COUNT(*) AS n_crimes,
DENSE_RANK() OVER(PARTITION BY EXTRACT('year' FROM t1.reported_crime_date) ORDER BY count(*) DESC) AS rnk
FROM
chicago.crimes AS t1
JOIN
chicago.weather AS t2
ON
t1.reported_crime_date = t2.weather_date
WHERE
t2.precipitation > 0
GROUP BY
crime_year,
day_of_week
ORDER BY
n_crimes DESC
);
DROP TABLE IF EXISTS weekday_values;
CREATE TEMP TABLE weekday_values AS (
SELECT
EXTRACT('year' FROM t1.reported_crime_date) AS crime_year,
to_char(t1.reported_crime_date, 'Day') AS day_of_week,
round(avg(t2.precipitation)::NUMERIC, 2) AS avg_precipitation,
round(avg(t2.temp_high)::NUMERIC, 2) AS avg_temp_high,
COUNT(*) AS n_crimes,
DENSE_RANK() OVER(PARTITION BY EXTRACT('year' FROM t1.reported_crime_date) ORDER BY count(*) DESC) AS rnk
FROM
chicago.crimes AS t1
JOIN
chicago.weather AS t2
ON
t1.reported_crime_date = t2.weather_date
WHERE
t2.precipitation = 0
GROUP BY
crime_year,
day_of_week
ORDER BY
n_crimes DESC
);
SELECT
t1.crime_year,
t1.day_of_week,
t1.avg_precipitation,
t1.avg_temp_high,
t1.n_crimes,
t2.day_of_week,
t2.avg_temp_high,
t2.n_crimes,
t2.n_crimes - t1.n_crimes AS n_crime_difference
FROM
weekday_precipitation_values AS t1
JOIN
weekday_values AS t2
ON
t1.crime_year = t2.crime_year
WHERE
t1.rnk = 1
AND
t2.rnk = 1
ORDER BY
t1.crime_year;
Results:
crime_year | day_of_week | avg_precipitation | avg_temp_high | n_crimes | day_of_week | avg_temp_high | n_crimes | n_crime_difference |
---|---|---|---|---|---|---|---|---|
2018 | Monday | 0.57 | 60.23 | 16455 | Friday | 60.24 | 25667 | 9212 |
2019 | Wednesday | 0.31 | 64.00 | 19411 | Friday | 57.09 | 27042 | 7631 |
2020 | Wednesday | 0.29 | 63.54 | 11394 | Sunday | 61.51 | 23477 | 12083 |
2021 | Sunday | 0.30 | 57.12 | 10889 | Monday | 65.31 | 22840 | 11951 |
2022 | Friday | 0.21 | 51.44 | 13029 | Monday | 59.95 | 26026 | 12997 |
2023 | Friday | 0.24 | 60.14 | 15855 | Tuesday | 65.40 | 27157 | 11302 |
10. List the days with the most reported crimes when there is zero precipitation and the day when precipitation is greater than .5". Include the day of the week, high temperature, amount and precipitation and the total number of reported crimes for that day.
WITH precipitation_false AS (
SELECT
t1.reported_crime_date,
to_char(t1.reported_crime_date, 'Day') AS day_of_week,
t2.temp_high,
t2.precipitation,
count(*) AS reported_crimes
FROM
chicago.crimes AS t1
JOIN
chicago.weather AS t2
ON
t1.reported_crime_date = t2.weather_date
WHERE
t2.precipitation = 0
GROUP BY
day_of_week,
t2.precipitation,
temp_high,
t1.reported_crime_date
ORDER BY
reported_crimes DESC
LIMIT
1
),
precipitation_true AS (
SELECT
t1.reported_crime_date,
to_char(t1.reported_crime_date, 'Day') AS day_of_week,
t2.temp_high,
t2.precipitation,
count(*) AS reported_crimes
FROM
chicago.crimes AS t1
JOIN
chicago.weather AS t2
ON
t1.reported_crime_date = t2.weather_date
WHERE
t2.precipitation > .5
GROUP BY
day_of_week,
temp_high,
t2.precipitation,
t1.reported_crime_date
ORDER BY
reported_crimes DESC
LIMIT
1
)
SELECT
*
FROM
precipitation_false
UNION
SELECT
*
FROM
precipitation_true
ORDER BY
reported_crimes DESC;
Results:
reported_crime_date | day_of_week | temp_high | precipitation | reported_crimes |
---|---|---|---|---|
2020-05-31 | Sunday | 69 | 0.0 | 1899 |
2018-10-01 | Monday | 72 | 1.56 | 926 |
11. List the most consecutive days where a homicide occured between 2018-2023 and the timeframe.
DROP TABLE IF EXISTS homicide_dates;
CREATE TEMP TABLE get_homicide_dates AS (
SELECT
DISTINCT ON (reported_crime_date) reported_crime_date AS homicide_dates
FROM
chicago.crimes
WHERE
crime_type = 'homicide'
);
WITH get_rn_diff AS (
SELECT
homicide_dates,
homicide_dates - row_number() OVER ()::int AS diff
FROM
get_homicide_dates
),
get_diff_count AS (
SELECT
homicide_dates,
count(*) OVER(PARTITION BY diff) AS diff_count
FROM
get_rn_diff
GROUP BY
homicide_dates,
diff
)
SELECT
max(diff_count) AS most_consecutive_days,
min(homicide_dates) || ' to ' || max(homicide_dates) AS consecutive_days_timeframe
FROM
get_diff_count
WHERE
diff_count = (SELECT max(diff_count) FROM get_diff_count);
Results:
most_consecutive_days | consecutive_days_timeframe |
---|---|
47 | 2020-06-13 to 2020-07-29 |
12. What are the top 10 most common locations for reported crimes and the number of reported crime (add percentage) depending on the temperature?
WITH get_totals AS (
SELECT
initcap(t1.location_description) AS location_description,
count(*) AS location_description_count,
sum(
CASE
WHEN t2.temp_high >= 60 AND t2.temp_high < 90 THEN 1
ELSE 0
END) AS warm_weather,
sum(
CASE
WHEN t2.temp_high >= 90 THEN 1
ELSE 0
END) AS hot_weather,
sum(
CASE
WHEN t2.temp_high < 60 AND t2.temp_high >= 32 THEN 1
ELSE 0
END) AS cold_weather,
sum(
CASE
WHEN t2.temp_high < 32 THEN 1
ELSE 0
END) AS freezing_weather
FROM
chicago.crimes AS t1
JOIN
chicago.weather AS t2
ON
t1.reported_crime_date = t2.weather_date
WHERE
t1.location_description IS NOT NULL
GROUP BY
t1.location_description
ORDER BY
location_description_count DESC
LIMIT 10
)
SELECT
location_description,
location_description_count,
warm_weather || ' (' || round(100 * (warm_weather / location_description_count::float)::NUMERIC, 2) || ')' AS warm_weather_perc,
hot_weather || ' (' || round(100 * (hot_weather / location_description_count::float)::NUMERIC, 2) || ')' AS hot_weather_perc,
cold_weather || ' (' || round(100 * (cold_weather / location_description_count::float)::NUMERIC, 2) || ')' AS cold_weather_perc,
freezing_weather || ' (' || round(100 * (freezing_weather / location_description_count::float)::NUMERIC, 2) || ')' AS freezing_weather_perc
FROM
get_totals;
Results:
location_description | location_description_count | warm_weather_perc | hot_weather_perc | cold_weather_perc | freezing_weather_perc |
---|---|---|---|---|---|
Street | 363349 | 180260 (49.61) | 23918 (6.58) | 136783 (37.65) | 22388 (6.16) |
Apartment | 245159 | 113450 (46.28) | 15218 (6.21) | 98388 (40.13) | 18103 (7.38) |
Residence | 221626 | 101749 (45.91) | 14121 (6.37) | 89374 (40.33) | 16382 (7.39) |
Sidewalk | 92128 | 49108 (53.30) | 6962 (7.56) | 31372 (34.05) | 4686 (5.09) |
Small Retail Store | 39506 | 18481 (46.78) | 2433 (6.16) | 15701 (39.74) | 2891 (7.32) |
Restaurant | 31822 | 14396 (45.24) | 1900 (5.97) | 13033 (40.96) | 2493 (7.83) |
Alley | 30873 | 16105 (52.17) | 2172 (7.04) | 11015 (35.68) | 1581 (5.12) |
Parking Lot / Garage (Non Residential) | 30555 | 15879 (51.97) | 2041 (6.68) | 11083 (36.27) | 1552 (5.08) |
Vehicle Non-Commercial | 22795 | 10875 (47.71) | 1527 (6.70) | 8805 (38.63) | 1588 (6.97) |
Other | 22728 | 9647 (42.45) | 1251 (5.50) | 9587 (42.18) | 2243 (9.87) |
13. Calculate the year over year growth in the number of reported crimes.
WITH get_year_count AS (
SELECT
EXTRACT('year' FROM t1.reported_crime_date) AS reported_crime_year,
count(*) AS num_of_crimes
FROM
chicago.crimes AS t1
GROUP BY
reported_crime_year
)
SELECT
reported_crime_year,
num_of_crimes,
LAG(num_of_crimes) OVER (ORDER BY reported_crime_year) AS prev_year_count,
round (100 * (num_of_crimes - LAG(num_of_crimes) OVER (ORDER BY reported_crime_year)) / LAG(num_of_crimes) OVER (ORDER BY reported_crime_year)::NUMERIC, 2) AS year_over_year
FROM
get_year_count;
Results:
reported_crime_year | num_of_crimes | prev_year_count | year_over_year |
---|---|---|---|
2018 | 268816 | ||
2019 | 261293 | 268816 | -2.80 |
2020 | 212176 | 261293 | -18.80 |
2021 | 208759 | 212176 | -1.61 |
2022 | 238736 | 208759 | 14.36 |
2023 | 261199 | 238736 | 9.41 |
14. Calculate the year over year growth in the number of reported domestic violence crimes.
WITH get_year_count AS (
SELECT
EXTRACT('year' FROM t1.reported_crime_date) AS domestic_crime_year,
count(*) AS num_of_crimes
FROM
chicago.crimes AS t1
WHERE
t1.domestic = TRUE
GROUP BY
domestic_crime_year
)
SELECT
domestic_crime_year,
num_of_crimes,
LAG(num_of_crimes) OVER (ORDER BY domestic_crime_year) AS prev_year_count,
round (100 * (num_of_crimes - LAG(num_of_crimes) OVER (ORDER BY domestic_crime_year)) / LAG(num_of_crimes) OVER (ORDER BY domestic_crime_year)::NUMERIC, 2) AS domestic_yoy
FROM
get_year_count;
Results:
domestic_crime_year | num_of_crimes | prev_year_count | domestic_yoy |
---|---|---|---|
2018 | 44099 | ||
2019 | 43344 | 44099 | -1.71 |
2020 | 39983 | 43344 | -7.75 |
2021 | 45018 | 39983 | 12.59 |
2022 | 42530 | 45018 | -5.53 |
2023 | 46834 | 42530 | 10.12 |
15. Calculate the year over year growth in the number of reported domestic violence crimes.
WITH get_totals AS (
SELECT
to_char(t1.reported_crime_date, 'Month') AS total_month,
COUNT(*) AS n_crimes,
round(avg(t2.temp_high), 1) AS avg_high_temp
FROM
chicago.crimes AS t1
JOIN
chicago.weather AS t2
ON
t1.reported_crime_date = t2.weather_date
GROUP BY
total_month
)
SELECT
total_month,
n_crimes,
LAG(n_crimes) OVER (ORDER BY TO_DATE(total_month, 'Month')) AS prev_month_count,
avg_high_temp,
avg_high_temp - LAG(avg_high_temp) OVER (ORDER BY TO_DATE(total_month, 'Month')) AS avg_temp_diff,
round (100 * (n_crimes - LAG(n_crimes) OVER (ORDER BY TO_DATE(total_month, 'Month'))) / LAG(n_crimes) OVER (ORDER BY TO_DATE(total_month, 'Month'))::NUMERIC, 2) AS total_crime_growth
FROM
get_totals;
Results:
total_month | n_crimes | prev_month_count | avg_high_temp | avg_temp_diff | total_crime_growth |
---|---|---|---|---|---|
January | 113133 | 33.4 | |||
February | 100713 | 113133 | 36.6 | 3.2 | -10.98 |
March | 113630 | 100713 | 47.7 | 11.1 | 12.83 |
April | 109391 | 113630 | 57.8 | 10.1 | -3.73 |
May | 126130 | 109391 | 72.2 | 14.4 | 15.30 |
June | 127774 | 126130 | 81.6 | 9.4 | 1.30 |
July | 135240 | 127774 | 85.1 | 3.5 | 5.84 |
August | 134712 | 135240 | 84.1 | -1.0 | -0.39 |
September | 127567 | 134712 | 77.4 | -6.7 | -5.30 |
October | 128470 | 127567 | 62.9 | -14.5 | 0.71 |
November | 116688 | 128470 | 48.3 | -14.6 | -9.17 |
December | 117531 | 116688 | 41.3 | -7.0 | 0.72 |
16. List the number of crimes reported and seasonal growth for each astronomical season and what was the average temperature for each season in 2020? Use a conditional statement to display either a Gain/Loss for the season and the season over season growth.
DROP TABLE IF EXISTS yearly_seasonal_data;
CREATE TEMP TABLE yearly_seasonal_data AS (
WITH get_season_count AS (
SELECT
EXTRACT('year' FROM t1.reported_crime_date) AS crime_year,
CASE
WHEN EXTRACT('month' FROM t1.reported_crime_date) IN ('01', '02', '12') THEN '1'
WHEN EXTRACT('month' FROM t1.reported_crime_date) IN ('03', '04', '05') THEN '2'
WHEN EXTRACT('month' FROM t1.reported_crime_date) IN ('06', '07', '08') THEN '3'
WHEN EXTRACT('month' FROM t1.reported_crime_date) IN ('09', '10', '11') THEN '4'
END AS season,
CASE
WHEN EXTRACT('month' FROM t1.reported_crime_date) IN ('01', '02', '12') THEN count(*)
WHEN EXTRACT('month' FROM t1.reported_crime_date) IN ('03', '04', '05') THEN count(*)
WHEN EXTRACT('month' FROM t1.reported_crime_date) IN ('06', '07', '08') THEN count(*)
WHEN EXTRACT('month' FROM t1.reported_crime_date) IN ('09', '10', '11') THEN count(*)
END AS n_crimes,
CASE
WHEN EXTRACT('month' FROM t1.reported_crime_date) IN ('01', '02', '12') THEN avg(t2.temp_high)
WHEN EXTRACT('month' FROM t1.reported_crime_date) IN ('03', '04', '05') THEN avg(t2.temp_high)
WHEN EXTRACT('month' FROM t1.reported_crime_date) IN ('06', '07', '08') THEN avg(t2.temp_high)
WHEN EXTRACT('month' FROM t1.reported_crime_date) IN ('09', '10', '11') THEN avg(t2.temp_high)
END AS avg_temp
FROM
chicago.crimes AS t1
JOIN
chicago.weather AS t2
ON
t1.reported_crime_date = t2.weather_date
GROUP BY
reported_crime_date,
crime_year
)
SELECT
crime_year,
season,
round(avg(avg_temp)::NUMERIC) AS avg_temp,
sum(n_crimes) AS n_crimes
FROM
get_season_count
GROUP BY
crime_year,
season
ORDER BY
crime_year, season
);
WITH get_buckets AS (
SELECT
*,
round((n_crimes - LAG(n_crimes) OVER ()) / LAG(n_crimes) OVER ()::NUMERIC, 2) AS total_crime_growth,
ntile(6) OVER (ORDER BY crime_year) AS nt
FROM
yearly_seasonal_data
)
SELECT
crime_year,
CASE
WHEN season = '1' THEN 'Winter'
WHEN season = '2' THEN 'Spring'
WHEN season = '3' THEN 'Summer'
WHEN season = '4' THEN 'Autumn'
END
,
avg_temp,
total_crime_growth,
CASE
WHEN total_crime_growth < 0 THEN 'Loss'
ELSE 'Gain'
END AS seasonal_growth
FROM
get_buckets
WHERE
nt = 3;
Results:
crime_year | case | avg_temp | total_crime_growth | seasonal_growth |
---|---|---|---|---|
2020 | Winter | 37 | -0.15 | Loss |
2020 | Spring | 59 | -0.13 | Loss |
2020 | Summer | 86 | 0.21 | Gain |
2020 | Autumn | 64 | -0.07 | Loss |
To be continued....
❗ If you find this repository helpful, please consider giving it a ⭐. Thanks! ❗