# S&P 500 - Code # Time Series of S&P 500 Volatile Dates # a volatile date in this case is a date where (|open-close|)/close > 1 # i.e. movement of greater than 1% in a day # Mention in the report that i made sure that the number of events were > 1000 as, based on "n Accuracy" # the accuracy of estimates is greatly improved above n = 1000 and doesnt get much bettter after that # Run Packages library(xlsx) SP <- read.xlsx("SandP.xlsx", header = TRUE, sheetName = "Sheet1") SP <- SP$days # translate to date format SPdate<-as.POSIXct(SP*86400, origin="1985-01-01") n=length(SPdate) y=c(1:n) plot(SPdate, y, type="l", main="S&P 500 Volatile Dates (1985-2020)", ylab="Volatile Dates (Total)", xlab="Year") # so that i can find nicer estimates SP1=SP/100 MLE <- nlm(loglik, c(2,2,2), hessian = TRUE, arrivals = SP1) MLE paste( c("alpha", "beta", "lambda"), round(MLE$estimate,2), sep=" = ") # Replicate using these Estimates alphaest<-MLE$estimate[1] betaest<-MLE$estimate[2] lambdaest<-MLE$estimate[3] # Branching Ratio Branch=alphaest/betaest Branch # The branching ratio is very high, 84.6% of all volitile days are driven by endogenous feedback #mechanisms in the market and less than 16% of instances of volatile trade days are driven by exogenous events. # # simulate to replicate SPrep1 <- multsim(alphaest, betaest, lambdaest, n) #exclude dates after the year 2020 SPrep1 = SPrep1[ SPrep1 < 126.94 ] SPrep1=SPrep1*100 #translate back to real days #voldatesrep=voldatesrep+1809 y=c(1:length(SPrep1)) SPrep1date<-as.POSIXct(SPrep1*86400, origin="1985-01-01") # plot rep alone plot(SPrep1date, y, type="l", main="S&P 500 Volatile Dates (1985-2020)", ylab="Volatile Dates (Total)", xlab="Year") #plot both together y=c(1:n) plot(SPdate, y, type="l", main="S&P 500 Volatile Dates (1985-2020)", ylab="Volatile Dates (Total)", xlab="Year") y=c(1:length(SPrep1date)) lines(SPrep1date,y, col="blue") legend("topleft",legend=c("Volitile Dates Counting Process (actual)","Volitile Dates Counting Process (resimulation)"), text.col=c("black","blue"),pch=c(16,15),col=c("black","blue")) ####### plot (resimulation multiple times) ##### # sim 1 SPrep1 <- simulate_hawkes(alphaest, betaest, lambdaest, n) SPrep1 = SPrep1*100 #translate back to real days SPrep1date<-as.POSIXct(SPrep1*86400, origin="1985-01-01") # sim 2 SPrep2 <- simulate_hawkes(alphaest, betaest, lambdaest, n) SPrep2 = SPrep2*100 #translate back to real days SPrep2date<-as.POSIXct(SPrep2*86400, origin="1985-01-01") # sim 3 SPrep3 <- simulate_hawkes(alphaest, betaest, lambdaest, n) SPrep3 = SPrep3*100 #translate back to real days SPrep3date<-as.POSIXct(SPrep3*86400, origin="1985-01-01") # sim 4 SPrep4 <- simulate_hawkes(alphaest, betaest, lambdaest, n) SPrep4 = SPrep4*100 #translate back to real days SPrep4date<-as.POSIXct(SPrep4*86400, origin="1985-01-01") # sim 5 SPrep5 <- simulate_hawkes(alphaest, betaest, lambdaest, n) SPrep5 = SPrep5*100 #translate back to real days SPrep5date<-as.POSIXct(SPrep5*86400, origin="1985-01-01") # sim 6 SPrep6 <- simulate_hawkes(alphaest, betaest, lambdaest, n) SPrep6 = SPrep6*100 #translate back to real days SPrep6date<-as.POSIXct(SPrep6*86400, origin="1985-01-01") #plot all together y=c(1:n) plot(SPdate, y, type="l", main="S&P 500 Volatile Dates (1985-2020)", ylab="Volatile Dates (Total)", xlab="Year") lines(SPrep1date,y, col="green") lines(SPrep2date,y, col="green") lines(SPrep3date,y, col="green") lines(SPrep4date,y, col="green") lines(SPrep5date,y, col="green") lines(SPrep6date,y, col="green") legend("bottomright",legend=c("Volitile Dates Counting Process (actual)","Volitile Dates Counting Process (resimulations)"), text.col=c("black","green"),pch=c(16,15),col=c("black","green")) # now find the average simulation realisation avgsim = c() for (i in y){ avgsim[i] = (SPrep1[i]+SPrep2[i]+SPrep3[i]+SPrep4[i]+SPrep5[i]+SPrep6[i])/6 } avgsim avgsimdate<-as.POSIXct(avgsim*86400, origin="1985-01-01") plot(SPdate, y, type="l", main="S&P 500 Volatile Dates (1985-2020)", ylab="Volatile Dates (Total)", xlab="Year") lines(avgsimdate,y, col="blue") legend("topleft",legend=c("Volitile Dates Counting Process (actual)","Volitile Dates Counting Process (average resimulation)"), text.col=c("black","blue"),pch=c(16,15),col=c("black","blue")) # This comes out perfectly sometimes #################################### Joeys paper method PCS ############################## alpha=alphaest beta=betaest lambda=lambdaest T=SP1[1300] tao = SP1[1:1300] omega=SP1[2054] PCSend=PCS(lambda,beta,alpha,T,tao,omega) PCSend # expecting m = 2054 as a predicted cascade size # got 2076.926 # reasonably close #plot the predicted over the actual yold=c(1:n) plot(SPdate, yold, type="l", main="S&P 500 Volatile Dates (1985-2020)", ylab="Arrivals", xlab="Time") # T=SPdate[1300] tao = SPdate[1:1300] omega=SPdate[2054] PCSline = c(1300,PCSend) PCSy=c(T,omega) lines(PCSy, PCSline, col="red") # legend("topleft",legend=c("Original Count","Predicted Cascade Size (Estimated after arrival 1300)"), text.col=c("black","red"),pch=c(16,15),col=c("black","red")) ############################ predict the future (joeys paper) ###################### alpha=alphaest beta=betaest lambda=lambdaest T=SP1[2054] tao = SP1[1:2054] omega=200 # arrivals <- PCS(lambda,beta,alpha,T,tao,omega) arrivals # 3240.847 ############## Plot PCS ##################### # yvec=c(2054,arrivals) xvec=c(12694,omega*100) y1 <- as.POSIXct(xvec*86400, origin="1985-01-01") ## yold=c(1:n) plot(SPdate, yold, type="l", main="S&P 500 Volatile Dates (1985-2020)", ylab="Arrivals", xlab="Time", ylim=c(0,3500), xlim=c(SPdate[1], y1[2])) #, xlim=c(1985-01-02 UTC,2039-10-05 UTC) lines(y1, yvec , col="red", type="l") legend("topleft",legend=c("Original Count","Predicted Cascade Size"), text.col=c("black","red"),pch=c(16,15),col=c("black","red")) ############################ predict the future (joeys paper) ###################### end ########## ############################ predict the future (resimulation & Joey's method) ###################### SPrep2 <- simulate_hawkes(alphaest, betaest, lambdaest, n) SPrep3 = SPrep2+126.95 SPrep3 = SPrep3*100 SPrep3date<-as.POSIXct(SPrep3*86400, origin="1985-01-01") y3=c(2055:(length(SPrep3)+2054)) plot(SPdate, yold, type="l", main="S&P 500 Volatile Dates (1985-2020)", ylab="Arrivals", xlab="Time", ylim=c(0,4050), xlim=c(SPdate[1],SPrep3date[2054])) #, xlim=c(1985-01-02 UTC,2039-10-05 UTC) lines(SPrep3date, y3 , col="red", type="l") # resimulation # now plot joeys method on top T=SP1[2054] tao = SP1[1:2054] omega=255 arrivals <- PCS(lambda,beta,alpha,T,tao,omega) arrivals yvec=c(2054,arrivals) xvec=c(12694,omega*100) y1 <- as.POSIXct(xvec*86400, origin="1985-01-01") lines(y1, yvec , col="green", type="l") # joeys method # legend legend("topleft",legend=c("Original Count","Predicted Cascade Size (resimulation)", "Predicted Cascade Size (m)"), text.col=c("black","red","green"),pch=c(16,15),col=c("black","red","green")) ####################################################################################