# Crime Data - Code #Baltimore Crime Data (n=999) # changed dates in excel to Unix time using =(A2-DATE(1970,1,1))*86400 baltimore <- read.xlsx("Baltimore.xlsx", header = TRUE, sheetName = "Sheet1") baltimorecrimetime <- baltimore$time #baltimorecrimetime<-as.numeric(baltimorecrimetime) baltimorecrimetime<-sort(baltimorecrimetime, decreasing = FALSE) baltimorecrimetimedate<-as.POSIXct(baltimorecrimetime, origin="1970-01-01") # time stored as number of seconds since 00:00 01/01/1970 # this converts from Unix Time to Date and Time baltimorecrimetime sub=1579670000 baltimorecrimetime<-baltimorecrimetime-sub baltimorecrimetime div=4000 baltimorecrimetime<-baltimorecrimetime/div baltimorecrimetime # plot in Date Format n=length(baltimorecrimetimedate) y=c(1:n) plot(baltimorecrimetimedate, y, type="l", main="Baltimore Crime Data", ylab="Crimes (Total)", xlab="Time (Date Format)") # plot in adjusted Unix Time n=length(baltimorecrimetime) y=c(1:n) plot(baltimorecrimetime, y, type="l", main="Baltimore Crime Data", ylab="Crimes (Total)", xlab="Time (adjusted Unix format)") # MLE MLE <- nlm(loglik, c(2,2,2), hessian = TRUE, arrivals = baltimorecrimetime) MLE paste( c("alpha", "beta", "lambda"), round(MLE$estimate,2), sep=" = ") # ansers: alpha=0.6595801 beta=1.0729113 lambda=1.6801595 # Replicate using these Estimates alphaest<-MLE$estimate[1] betaest<-MLE$estimate[2] lambdaest<-MLE$estimate[3] # Branching Ratio Branch=alphaest/betaest Branch # baltimorecrimetimerep <- multsim(alphaest, betaest, lambdaest, n) y=c(1:n) plot(baltimorecrimetimerep, y, type="l", main="Baltimore Crime Data", ylab="Crimes (Total)", xlab="Time (adjusted Unix format)") # plot real data and rep data plot(baltimorecrimetime, y, type="l", main="Actual vs Simulated Baltimore Crime Data", ylab="Crimes (Total)", xlab="Time (adjusted Unix format)") lines(baltimorecrimetimerep, y, col="blue") legend("topleft",legend=c("Actual Baltimore Crime Data","Simulated Baltimore Crime Data"), text.col=c("black","blue"),pch=c(16,15),col=c("black","blue")) # Translate to date format baltimorecrimetimerepdate<-baltimorecrimetimerep*div baltimorecrimetimerepdate<-baltimorecrimetimerepdate+sub baltimorecrimetimerepdate<-as.POSIXct(baltimorecrimetimerepdate, origin="1970-01-01") # Plot the same in Date Format plot(baltimorecrimetimedate, y, type="l", main="Actual vs Simulated Baltimore Crime Data", ylab="Crimes (Total)", xlab="Time (Date Format)") lines(baltimorecrimetimerepdate, y, col="blue") legend("topleft",legend=c("Actual Baltimore Crime Data","Simulated Baltimore Crime Data"), text.col=c("black","blue"),pch=c(16,15),col=c("black","blue")) ################################## #### joeys method - Baltimore #### ################################## baltimorecrimetime <- baltimore$time #baltimorecrimetime<-as.numeric(baltimorecrimetime) baltimorecrimetime<-sort(baltimorecrimetime, decreasing = FALSE) baltimorecrimetimedate<-as.POSIXct(baltimorecrimetime, origin="1970-01-01") # time stored as number of seconds since 00:00 01/01/1970 # this converts from Unix Time to Date and Time baltimorecrimetime sub=1579670000 baltimorecrimetime<-baltimorecrimetime-sub baltimorecrimetime div=4000 baltimorecrimetime<-baltimorecrimetime/div baltimorecrimetime baltimorecrimetime1 = baltimorecrimetime[1:298] baltimorecrimetime1 # reduced to 160 values n = length(baltimorecrimetime1) y=c(1:n) plot(baltimorecrimetime1, y, type="l", main="Baltimore Crime Data", ylab="Crimes (Total)", xlab="Time (Date Format)") # want the predict the window 55 to 62 # element 220 to 275 T=baltimorecrimetime1[220] # T is the time of the last observed arrival tao=baltimorecrimetime1[1:220] # tao is the vector of arrival times observed omega=baltimorecrimetime1[280] # omega is the time at which i will predict how many arrivals will have come in total PCSend<-PCS(lambdaest,betaest,alphaest,T,tao,omega) PCSend # starting at the 220th arrival predicting out to the actual 280th arrival # expecting m = 280 as a predicted cascade size # got 280.0757 # extremely close # Plot the same in Date Format plot(baltimorecrimetime1, y, type="l", main="Actual vs Simulated Baltimore Crime Data", ylab="Crimes (Total)", xlab="Time (Date Format)") lines(baltimorecrimetime1, y, col="blue") legend("topleft",legend=c("Actual Baltimore Crime Data","Simulated Baltimore Crime Data"), text.col=c("black","blue"),pch=c(16,15),col=c("black","blue")) #plot this plot(baltimorecrimetime1, y, type="l", main="Hawkes Process Simulation with Predicted Cascade Size (n = 500)", ylab="Arrivals", xlab="Time") PCSline = c(220,PCSend) PCSy=c(T,omega) lines(PCSy, PCSline, col="blue") legend("topleft",legend=c("Original Simulation","Predicted Cascade Size (Estimated at the bigining of a cluster)"), text.col=c("black","blue"),pch=c(16,15),col=c("black","blue")) baltimorecrimetime1<-baltimorecrimetime1*div baltimorecrimetime1<-baltimorecrimetime1+sub baltimorecrimetime1<-as.POSIXct(baltimorecrimetime1, origin="1970-01-01") PCSline = c(220,PCSend) omega=baltimorecrimetime1[280] T=baltimorecrimetime1[220] PCSy=c(T,omega) plot(baltimorecrimetime1, y, type="l", main="Baltimore Crime Data (4 days)", ylab="Arrivals", xlab="Time") lines(PCSy, PCSline, col="blue") legend("topleft",legend=c("Actual Baltimore Crime Data","Predicted Cascade Size (Estimated from cluster beginning)"), text.col=c("black","blue"),pch=c(16,15),col=c("black","blue")) # Kansas Crime Data (n=1000) # changed dates in excel to Unix time using =(A2-DATE(1970,1,1))*86400 kansas <- read.xlsx("Kansas2011small.xlsx", header = TRUE, sheetName = "Sheet2") kansastime <- kansas$unix #kansastime<-sort(kansastime, decreasing = FALSE) kansastimedate<-as.POSIXct(kansastime, origin="2011-01-01") plot(kansastimedate, y, type="l", main="Kansas Crime Data", ylab="Crimes (Total)", xlab="Time (Date Format)") # this converts from Unix Time to Date and Time #kansastime sub=1000 kansastime<-kansastime-sub div=8500000 # I've found that changing the div here performs the same # task as changing the initial guess in a round-about way kansastime<-kansastime/div kansastime # plot in Date Format n=length(kansastimedate) y=c(1:n) plot(kansastimedate, y, type="l", main="Kansas Crime Data", ylab="Crimes (Total)", xlab="Time (Date Format)") # plot in adjusted Unix Time n=length(kansastime) y=c(1:n) plot(kansastime, y, type="l", main="Kansas Crime Data", ylab="Crimes (Total)", xlab="Time (adjusted Unix format)") # MLE MLE <- nlm(loglik, c(2,2,2), hessian = TRUE, arrivals = kansastime) MLE paste( c("alpha", "beta", "lambda"), round(MLE$estimate,2), sep=" = ") # Replicate using these Estimates alphaest<-MLE$estimate[1] betaest<-MLE$estimate[2] lambdaest<-MLE$estimate[3] # Branching Ratio Branch=alphaest/betaest Branch # kansastimerep <- multsim(alphaest, betaest, lambdaest, n) y=c(1:n) plot(kansastimerep, y, type="l", main="Kansas Crime Data", ylab="Crimes (Total)", xlab="Time (adjusted Unix format)") # plot real data and rep data plot(kansastime, y, type="l", main="Actual vs Simulated Kansas Crime Data", ylab="Crimes (Total)", xlab="Time (adjusted Unix format)") lines(kansastimerep, y, col="blue") legend("topleft",legend=c("Actual Kansas Crime Data","Simulated Kansas Crime Data"), text.col=c("black","blue"),pch=c(16,15),col=c("black","blue")) # Translate to date format kansastimerepdate<-kansastimerep*div kansastimerepdate<-kansastimerepdate+sub kansastimerepdate<-as.POSIXct(kansastimerepdate, origin="2011-01-01") # Plot the same in Date Format plot(kansastimedate, y, type="l", main="Actual vs Simulated Kansas Crime Data", ylab="Crimes (Total)", xlab="Time (Date Format)") lines(kansastimerepdate, y, col="blue") legend("topleft",legend=c("Actual Kansas Crime Data","Simulated Kansas Crime Data"), text.col=c("black","blue"),pch=c(16,15),col=c("black","blue")) ################################## #### joeys method - Kansas #### ################################## kansas <- read.xlsx("Kansas2011small.xlsx", header = TRUE, sheetName = "Sheet2") kansastime <- kansas$unix #kansastime<-sort(kansastime, decreasing = FALSE) kansastimedate<-as.POSIXct(kansastime, origin="2011-01-01") n=length(kansastimedate) y=c(1:n) plot(kansastimedate, y, type="l", main="Kansas Crime Data", ylab="Crimes (Total)", xlab="Time (Date Format)") # this converts from Unix Time to Date and Time #kansastime sub=1000 kansastime<-kansastime-sub div=8500000 # I've found that changing the div here performs the same task as changing the initial guess in a round-about way kansastime<-kansastime/div # plot in Date Format kansastime1 = kansastime kansastime1 kansastime2 = kansastime1[400:650] n = length(kansastime2) y = c(1:n) plot(kansastime2, y, type="l", main="Kansas Crime Data", ylab="Crimes (Total)", xlab="Time (Date Format)") T=kansastime[450] # T is the time of the last observed arrival tao=kansastime[1:450] # tao is the vector of arrival times observed omega=kansastime[600] # omega is the time at which i will predict how many arrivals will have come in total PCSend<-PCS(lambdaest,betaest,alphaest,T,tao,omega) PCSend # starting at the 450th arrival predicting out to the actual 280th arrival # expecting m = 600 as a predicted cascade size # got 600.0063 # extreeeeeeeemely close n=length(kansastime) y=c(1:n) # Plot the same in Date Format plot(kansastime, y, type="l", main="Actual vs Simulated Baltimore Crime Data", ylab="Crimes (Total)", xlab="Time (Date Format)") lines(kansastime, y, col="blue") legend("topleft",legend=c("Actual Baltimore Crime Data","Simulated Baltimore Crime Data"), text.col=c("black","blue"),pch=c(16,15),col=c("black","blue")) #plot this plot(kansastime, y, type="l", main="Hawkes Process Simulation with Predicted Cascade Size (n = 500)", ylab="Arrivals", xlab="Time") PCSline = c(450,600.0063) PCSy=c(T,omega) lines(PCSy, PCSline, col="blue") legend("topleft",legend=c("Original Simulation","Predicted Cascade Size (Estimated at the biginning of a cluster)"), text.col=c("black","blue"),pch=c(16,15),col=c("black","blue")) kansastime<-kansastime*div kansastime<-kansastime+sub kansastime<-as.POSIXct(kansastime, origin="2011-01-01") T=kansastimedate[450] # T is the time of the last observed arrival tao=kansastimedate[1:450] # tao is the vector of arrival times observed omega=kansastimedate[600] n=length(kansastime) y=c(1:n) plot(kansastimedate, y, type="l", main="Kansas Crime Data (4 days)", ylab="Arrivals", xlab="Time") PCSline = c(450,600.0063) PCSy=c(T,omega) lines(PCSy, PCSline, col="blue") legend("topleft",legend=c("Actual Kansas Crime Data","Predicted Cascade Size (Estimated from cluster beginning)"), text.col=c("black","blue"),pch=c(16,15),col=c("black","blue"))