import pandas as pd
ori_guide=pd.read_csv('https://raw.githubusercontent.com/ARMargolis/UCRanalysis/main/ORI.csv').set_index('ori')
raw_ori_data=pd.read_csv('https://raw.githubusercontent.com/ARMargolis/UCRanalysis/main/ori_over_250k_full.csv')
raw_ori_data.head()


ori_data_pivot=raw_ori_data.pivot_table(index='ori', columns=['data_year','offense'], values=['cleared', 'actual'])
ori_data_pivot.head()


most_nulls=ori_data_pivot.isnull().sum(axis=1).sort_values(ascending=False).head(10)
print(most_nulls)
ori_guide.loc[most_nulls.index, 'agency_name']

ori
AKAST0100    456
KY0568000    192
NC0920100     72
NY0510100     72
NY0290000     72
MDBPD0000     24
FL0500000     24
FL0510000     24
FL0520000     24
OHCIP0000     24
dtype: int64

ori
AKAST0100                        State Troopers
KY0568000    Louisville Metro Police Department
NC0920100             Raleigh Police Department
NY0510100      Suffolk County Police Department
NY0290000       Nassau County Police Department
MDBPD0000           Baltimore Police Department
FL0500000    Palm Beach County Sheriff's Office
FL0510000         Pasco County Sheriff's Office
FL0520000      Pinellas County Sheriff's Office
OHCIP0000          Cincinnati Police Department
Name: agency_name, dtype: object


ori_data_final=ori_data_pivot.drop(most_nulls.index[:5])


ori_data_final=ori_data_final.interpolate(method='linear', axis=0)


for row_num in range(ori_data_final.shape[0]):
  ori_data_final.iloc[row_num]*=100000/ori_guide.loc[ori_data_final.index[row_num], 'population']


print(ori_data_final.iloc[0,-24:])
ori_np=ori_data_final.values.reshape(131,2,25,12)
ori_np[0,-1,-2:,:]

         data_year  offense            
cleared  2018       aggravated-assault     507.238439
                    arson                    4.979435
                    burglary               103.240284
                    homicide                 4.647473
                    human-trafficing         0.000000
                    larceny                433.210839
                    motor-vehicle-theft    153.034634
                    property-crime         689.485757
                    rape                    40.831366
                    rape-legacy              0.000000
                    robbery                 91.621603
                    violent-crime          644.338880
         2019       aggravated-assault     485.660887
                    arson                    7.967096
                    burglary                95.605151
                    homicide                 6.971209
                    human-trafficing         0.000000
                    larceny                373.125658
                    motor-vehicle-theft     88.965904
                    property-crime         557.696713
                    rape                    20.581664
                    rape-legacy              0.000000
                    robbery                 65.064616
                    violent-crime          578.278377
Name: AK0010100, dtype: float64

array([[507.23843858,   4.97943493, 103.24028429,   4.6474726 ,
          0.        , 433.21083923, 153.03463363, 689.48575716,
         40.83136646,   0.        ,  91.62160278, 644.33888042],
       [485.6608872 ,   7.96709589,  95.60515073,   6.97120891,
          0.        , 373.1256577 ,  88.96590415, 557.69671258,
         20.58166439,   0.        ,  65.06461647, 578.27837697]])


import tensorflow as tf
ori_tf=tf.Variable(ori_np, dtype=tf.float32)
ori_norm_tf=tf.keras.utils.normalize(ori_tf)


def cluster_step(data, bandwidth):
  change=np.zeros(data.shape)
  for x in range(data.shape[0]):
    difference=tf.math.subtract(data,tf.broadcast_to(tf.gather(data,x) , data.shape))
    distance=tf.scalar_mul(-0.5/bandwidth**2, tf.math.square(difference))
    change[x]=tf.reduce_sum(tf.multiply(tf.exp(distance), difference), axis=0).numpy()
  return tf.math.subtract(data,tf.constant(change, dtype=data.dtype)), np.square(change).sum()


from time import ctime
import numpy as np

dist_sq=1
count=0
new_ori_tf=ori_norm_tf
print('Start', ctime())
while dist_sq>0.01*0.01:
  new_ori_tf, dist_sq=cluster_step(new_ori_tf, 0.01)
  count+=1
  if count%500==0:
    print(count, dist_sq, ctime())
print('Done', dist_sq, ctime())

Start Wed Jan 13 20:39:02 2021
500 0.02276879082391721 Wed Jan 13 20:40:14 2021
1000 0.006877025073071565 Wed Jan 13 20:41:23 2021
1500 0.0019519331326280913 Wed Jan 13 20:42:33 2021
2000 0.0006848493218250168 Wed Jan 13 20:43:42 2021
2500 0.0004287393047034695 Wed Jan 13 20:44:51 2021
3000 0.00031875683125860536 Wed Jan 13 20:45:59 2021
3500 0.00024394157005310326 Wed Jan 13 20:47:07 2021
4000 0.00019083634949514338 Wed Jan 13 20:48:15 2021
4500 0.00015201034493367795 Wed Jan 13 20:49:22 2021
5000 0.00012297801430013615 Wed Jan 13 20:50:33 2021
5500 0.00010086351100298652 Wed Jan 13 20:51:41 2021
Done 9.997841607564483e-05 Wed Jan 13 20:51:44 2021


from sklearn.cluster import AffinityPropagation
X=new_ori_tf.numpy().reshape([131,600])

clustering = AffinityPropagation(damping=0.95, max_iter=1000).fit(X)
clustering.labels_

array([ 1,  6,  8,  7,  7,  6,  9,  2,  0,  8,  0,  6,  0,  0,  0,  3,  0,
        7,  0,  8,  8,  8,  0,  0,  6,  0,  6,  8,  8,  1,  1,  0,  3,  4,
        2,  3,  2,  4,  2,  3,  2,  3,  3,  2,  3,  2,  2,  3,  3,  3,  3,
        3,  7,  6,  6,  6,  4, 10,  5, 10,  0,  1,  6,  6,  4,  4,  2,  6,
       10, 10,  8,  4,  0,  3,  6,  9,  8,  0,  9,  7,  6,  6,  8,  8,  0,
        2,  8,  3,  0,  5,  8,  3,  0,  8,  8,  8,  6,  4,  8,  3,  4,  2,
        9,  8,  3,  7,  9,  0,  8,  3,  9,  9,  9,  6,  9,  7,  8,  8,  9,
        7, 10, 10, 10, 10, 10, 10,  1,  3,  3,  6,  8])


lbl_lists=[]
drop_last2words=lambda s:' '.join(s.split(' ')[:-2])

for lbl in range(clustering.labels_.max()+1):
  lbl_lists.append([x for x in range(ori_data_final.shape[0]) if clustering.labels_[x]==lbl])
  print(lbl, [drop_last2words(ori_guide.loc[ori_data_final.index[x], 'agency_name']) for x in lbl_lists[-1]])

0 ['Oakland', 'Kern County', 'Los Angeles County', 'Long Beach', 'Los Angeles', 'Santa Ana', 'Riverside County', 'San Bernardino County', 'San Diego County', 'San Diego', 'Denver', 'Indianapolis', 'Detroit', 'St. Louis', 'Newark', 'Buffalo', 'Cleveland', 'Fort Bend County']
1 ['Anchorage', 'Aurora', 'Colorado Springs', 'Wichita', 'King County']
2 ['Tucson', 'New Castle County', 'Miami-Dade County', 'Jacksonville', 'Hillsborough County', 'Manatee County', 'Orange County', 'Orlando', 'Anne Arundel County', 'Albuquerque', 'Nashville Metropolitan']
3 ['Anaheim', 'Connecticut', 'Collier County', 'Escambia County', 'Tampa', 'Lee County', 'Marion County', 'Palm Beach County', 'Pasco County', 'Pinellas County', 'St. Petersburg', 'Polk County', 'Minneapolis', 'Henderson', 'Cincinnati', 'Greenville County', 'Bexar County', 'Hidalgo County', 'Pierce County', 'Snohomish County']
4 ['Washington', 'Miami', 'Atlanta', 'New Orleans', 'Boston', 'Baltimore', 'Philadelphia', 'Richland County']
5 ['Chicago', 'New York City']
6 ['Mobile', 'Phoenix', 'Bakersfield', 'Chula Vista', 'San Francisco', 'Cobb County', 'DeKalb County', 'Gwinnett County', 'Lexington', 'Jefferson County', 'Baltimore County', 'St. Paul', 'Durham', 'Greensboro', 'Portland', 'Fort Worth', 'Seattle']
7 ['Chandler', 'Mesa', 'Irvine', 'Sarasota County', 'Lincoln', 'Plano', 'Laredo', 'Salt Lake County Unified']
8 ['Maricopa County', 'Fresno', 'Riverside', 'Sacramento County', 'Sacramento', 'Stockton', 'San Jose', "Prince George's County", 'Kansas City', 'Charlotte-Mecklenburg', 'Jersey City', 'Las Vegas Metropolitan Police Department', 'Toledo', 'Columbus', 'Oklahoma City', 'Tulsa', 'Pittsburgh Bureau', 'Memphis', 'Harris County', 'Dallas', 'Houston', 'Milwaukee']
9 ['Pima County', 'St. Louis County', 'Omaha', 'Knox County', 'El Paso', 'Montgomery County', 'Corpus Christi', 'Arlington', 'Austin', 'San Antonio']
10 ['Honolulu', 'Fort Wayne', 'Howard County', 'Montgomery County', 'Chesterfield County', 'Fairfax County', 'Henrico County', 'Loudoun County', 'Prince William County', 'Virginia Beach']


lbl_agg_means=pd.concat([ori_data_final.iloc[lbl_lst,range(0,300,12)].mean(axis=0) for lbl_lst in lbl_lists], axis=1)
lbl_agg_means=lbl_agg_means.reset_index(level=0, drop=True).reset_index(level=1, drop=True)
names=[', '.join([drop_last2words(ori_guide.loc[ori_data_final.index[x],
                        'agency_name']) for x in lbl_lst]) for lbl_lst in lbl_lists]
lbl_agg_means.columns=pd.Series(names, name='Names')
lbl_agg_means=lbl_agg_means.sort_values(by=2019,axis=1, ascending=False)
lbl_agg_means.tail()


from bokeh.models import ColumnDataSource, Legend
from bokeh.plotting import figure, output_file, show
from bokeh.io import output_notebook

output_notebook()

color_list=['brown','red', 'darkviolet', 'orange','yellow','olive','darkgreen','magenta','cyan','blue','black','gray']

source = ColumnDataSource(lbl_agg_means)
p = figure(plot_width=1200, plot_height=400, title='Assaults per 100,000 residents', tools=[])

for c,lbl in enumerate(lbl_agg_means.columns):
  p.line(x='data_year', y=lbl, source=source, line_color=color_list[c])

show(p)


from math import sqrt
map_viz=ori_guide.loc[ori_data_final.index, ['agency_name', 'agency_type_name', 'icpsr_lat', 'icpsr_lng', 'population']]
map_viz=pd.concat([map_viz, pd.Series(clustering.labels_, index=ori_data_final.index, name='group')], axis=1)
map_viz['color']=map_viz['group'].apply(lambda c:color_list[c])
map_viz['radius']=map_viz['population'].apply(lambda x:sqrt(x)/1000)
map_viz['desc']=map_viz['agency_name'].apply(drop_last2words)

map_viz.head()


output_notebook()

color_list=['brown','red', 'darkviolet', 'orange','yellow','olive','darkgreen','magenta','cyan','blue','black','gray']

source = ColumnDataSource(map_viz)
TOOLTIPS=[('Agency:','@desc'),('Population', '@population')]
q = figure(plot_width=1200, plot_height=800, title='Crime patterns', y_range=(20,70), tooltips=TOOLTIPS)
q.image_url(url=['https://raw.githubusercontent.com/ARMargolis/UCRanalysis/main/Map_United_States.png'], x=-170, y=88,
            w=108, h=70)

q.circle(x='icpsr_lng', y='icpsr_lat', source=source, fill_color=None, line_color='color', line_width=2, radius='radius')
q.axis.visible=False          

show(q)

	actual																																								...	cleared
data_year	1995												1996												1997												1998				...	2016				2017												2018												2019
offense	aggravated-assault	arson	burglary	homicide	human-trafficing	larceny	motor-vehicle-theft	property-crime	rape	rape-legacy	robbery	violent-crime	aggravated-assault	arson	burglary	homicide	human-trafficing	larceny	motor-vehicle-theft	property-crime	rape	rape-legacy	robbery	violent-crime	aggravated-assault	arson	burglary	homicide	human-trafficing	larceny	motor-vehicle-theft	property-crime	rape	rape-legacy	robbery	violent-crime	aggravated-assault	arson	burglary	homicide	...	rape	rape-legacy	robbery	violent-crime	aggravated-assault	arson	burglary	homicide	human-trafficing	larceny	motor-vehicle-theft	property-crime	rape	rape-legacy	robbery	violent-crime	aggravated-assault	arson	burglary	homicide	human-trafficing	larceny	motor-vehicle-theft	property-crime	rape	rape-legacy	robbery	violent-crime	aggravated-assault	arson	burglary	homicide	human-trafficing	larceny	motor-vehicle-theft	property-crime	rape	rape-legacy	robbery	violent-crime
ori
AK0010100	1462.0	96.0	2521.0	29.0	0.0	11152.0	2122.0	15795.0	0.0	242.0	777.0	2510.0	1297.0	87.0	2353.0	25.0	0.0	10163.0	1584.0	14100.0	0.0	198.0	558.0	2078.0	1191.0	83.0	1931.0	23.0	0.0	10083.0	1362.0	13376.0	0.0	174.0	501.0	1889.0	1056.0	77.0	1617.0	19.0	...	80.0	0.0	205.0	1513.0	1348.0	14.0	250.0	28.0	0.0	1358.0	497.0	2105.0	58.0	0.0	216.0	1650.0	1528.0	15.0	311.0	14.0	0.0	1305.0	461.0	2077.0	123.0	0.0	276.0	1941.0	1463.0	24.0	288.0	21.0	0.0	1124.0	268.0	1680.0	62.0	0.0	196.0	1742.0
AKAST0100	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	69.0	0.0	15.0	721.0	665.0	14.0	171.0	14.0	0.0	126.0	88.0	385.0	77.0	0.0	13.0	769.0	714.0	0.0	159.0	9.0	0.0	176.0	99.0	434.0	87.0	0.0	28.0	838.0	729.0	2.0	188.0	19.0	0.0	383.0	86.0	657.0	97.0	0.0	44.0	889.0
AL0020100	757.0	77.0	4201.0	54.0	0.0	11046.0	1875.0	17122.0	0.0	108.0	1372.0	2291.0	732.0	140.0	4404.0	51.0	0.0	10990.0	1933.0	17327.0	0.0	119.0	1283.0	2185.0	562.0	19.0	4524.0	52.0	0.0	11343.0	1747.0	17614.0	0.0	99.0	1160.0	1873.0	524.0	155.0	4438.0	36.0	...	59.0	0.0	137.0	624.0	638.0	25.0	422.0	42.0	0.0	1376.0	212.0	2010.0	84.0	0.0	183.0	947.0	561.0	3.0	337.0	20.0	0.0	1335.0	184.0	1856.0	95.0	0.0	170.0	846.0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
AZ0070000	426.0	15.0	1526.0	28.0	0.0	4017.0	918.0	6461.0	0.0	33.0	91.0	578.0	394.0	11.0	1241.0	22.0	0.0	3843.0	775.0	5859.0	0.0	24.0	75.0	515.0	479.0	11.0	1659.0	20.0	0.0	3922.0	794.0	6375.0	0.0	31.0	65.0	595.0	594.0	10.0	2018.0	24.0	...	20.0	0.0	26.0	444.0	417.0	7.0	60.0	8.0	0.0	474.0	89.0	623.0	19.0	0.0	33.0	477.0	538.0	2.0	77.0	6.0	0.0	460.0	60.0	597.0	40.0	0.0	32.0	616.0	97.0	0.0	11.0	2.0	0.0	73.0	13.0	97.0	6.0	0.0	4.0	109.0
AZ0070500	243.0	84.0	1677.0	9.0	0.0	4882.0	1118.0	7677.0	0.0	32.0	119.0	403.0	254.0	77.0	1796.0	2.0	0.0	5210.0	1116.0	8122.0	0.0	29.0	143.0	428.0	201.0	68.0	1969.0	3.0	0.0	5524.0	1022.0	8515.0	0.0	41.0	140.0	385.0	177.0	72.0	1831.0	5.0	...	49.0	0.0	74.0	280.0	181.0	3.0	81.0	7.0	0.0	909.0	51.0	1041.0	40.0	0.0	30.0	258.0	162.0	4.0	76.0	3.0	0.0	892.0	30.0	998.0	22.0	0.0	35.0	222.0	198.0	0.0	79.0	3.0	0.0	925.0	30.0	1034.0	17.0	0.0	32.0	250.0

Names	Washington, Miami, Atlanta, New Orleans, Boston, Baltimore, Philadelphia, Richland County	Anchorage, Aurora, Colorado Springs, Wichita, King County	Oakland, Kern County, Los Angeles County, Long Beach, Los Angeles, Santa Ana, Riverside County, San Bernardino County, San Diego County, San Diego, Denver, Indianapolis, Detroit, St. Louis, Newark, Buffalo, Cleveland, Fort Bend County	Maricopa County, Fresno, Riverside, Sacramento County, Sacramento, Stockton, San Jose, Prince George's County, Kansas City, Charlotte-Mecklenburg, Jersey City, Las Vegas Metropolitan Police Department, Toledo, Columbus, Oklahoma City, Tulsa, Pittsburgh Bureau, Memphis, Harris County, Dallas, Houston, Milwaukee	Chicago, New York City	Tucson, New Castle County, Miami-Dade County, Jacksonville, Hillsborough County, Manatee County, Orange County, Orlando, Anne Arundel County, Albuquerque, Nashville Metropolitan	Mobile, Phoenix, Bakersfield, Chula Vista, San Francisco, Cobb County, DeKalb County, Gwinnett County, Lexington, Jefferson County, Baltimore County, St. Paul, Durham, Greensboro, Portland, Fort Worth, Seattle	Pima County, St. Louis County, Omaha, Knox County, El Paso, Montgomery County, Corpus Christi, Arlington, Austin, San Antonio	Anaheim, Connecticut, Collier County, Escambia County, Tampa, Lee County, Marion County, Palm Beach County, Pasco County, Pinellas County, St. Petersburg, Polk County, Minneapolis, Henderson, Cincinnati, Greenville County, Bexar County, Hidalgo County, Pierce County, Snohomish County	Chandler, Mesa, Irvine, Sarasota County, Lincoln, Plano, Laredo, Salt Lake County Unified	Honolulu, Fort Wayne, Howard County, Montgomery County, Chesterfield County, Fairfax County, Henrico County, Loudoun County, Prince William County, Virginia Beach
data_year
2015	571.717959	393.266418	449.313171	477.760196	418.545135	413.829032	275.691961	254.423586	255.105296	159.508745	89.108101
2016	576.546067	440.744184	505.449469	510.621870	470.096336	415.833981	298.168902	289.604566	244.112695	152.259463	95.345060
2017	582.943338	462.734692	508.950780	515.340685	456.909390	409.543881	305.852000	292.641386	240.638075	151.676566	91.640026
2018	573.449015	538.878583	518.672923	515.485957	456.295173	414.776686	310.023083	288.731725	229.402178	148.647801	96.132487
2019	569.364299	545.961469	503.072062	498.259817	463.513510	417.348757	316.506941	308.164064	233.542084	151.742185	86.801107

	agency_name	agency_type_name	icpsr_lat	icpsr_lng	population	group	color	radius	desc
ori
AK0010100	Anchorage Police Department	City	61.174250	-149.284329	301239.0	1	red	0.548852	Anchorage
AL0020100	Mobile Police Department	City	30.684572	-88.196568	250346.0	6	darkgreen	0.500346	Mobile
AZ0070000	Maricopa County Sheriff's Office	County	33.346541	-112.495534	395937.0	8	cyan	0.629235	Maricopa County
AZ0070500	Chandler Police Department	City	33.346541	-112.495534	258875.0	7	magenta	0.508798	Chandler
AZ0071700	Mesa Police Department	City	33.346541	-112.495534	471034.0	7	magenta	0.686319	Mesa

Crime Trends in Large and Medium Jurisdictions¶

Conclusion¶

	Unnamed: 0	ori	data_year	offense	state_abbr	cleared	actual
0	0	NC0410200	1995	homicide	NC	23	36
1	1	NC0410200	1995	aggravated-assault	NC	578	1189
2	2	NC0410200	1995	arson	NC	34	133
3	3	NC0410200	1995	burglary	NC	283	3671
4	4	NC0410200	1995	human-trafficing	NC	0	0