
# get a moderately up-to-date list of constituent symbols
sp500.symbol.url <- "http://blog.quanttrader.org/wp-content/uploads/sp500.csv"
sp500.symbols <- scan(url(sp500.symbol.url), what="")
sp500.symbol.url

# get functions used to read in data
source('http://www.portfolioprobe.com/R/pprobe_functions01.R')
require(TTR)
sp500.close <- pp.TTR.multsymbol(sp500.symbols, 20070101, 20110715)
sp500.closeok <- sp500.close[, colSums(is.na(sp500.close)) == 0]

# create return object (class xts)
sp500.ret <- diff(log(sp500.closeok))[-1,]

# visually check data -- always a good idea
plot(1:4) # just something so graphics device exists so next command works
par(ask=TRUE)
for(i in colnames(sp500.closeok)) plot(sp500.closeok[,i], main=i,type='l')

# spotted a flat spot at end of data

# find all occurrences
sort(colSums(abs(tail(sp500.ret))))

# only 'Q' looks bad, remove it
match('Q', colnames(sp500.ret))
sp500.retorig <- sp500.ret
sp500.ret <- sp500.retorig[, -367]

# get index data and add it to constituents
spxclose <- getYahooData('^GSPC', 20070101, 20110715)[, 'Close']
spxret <- diff(log(spxclose))[-1,]
spall.ret <- cbind(spxret, sp500.ret)

# write a function and use it
fix(pp.const.ind.cor)
spconind50 <- pp.const.ind.cor(as.matrix(spall.ret))

# get the pp.timeplot function
source('http://www.portfolioprobe.com/R/blog/pp.timeplot.R')
pp.timeplot(rowMeans(spconind50))

# write another function (for intra-constituent correlation)
pp.mean.const.cor <- pp.const.ind.cor
fix(pp.mean.const.cor)
spconcor50 <- pp.mean.const.cor(as.matrix(spall.ret))

# do bootstrap over constituents
spconind50.bootcon <- matrix(NA, 1093, 1000, dimnames=list(rownames(spconind50), NULL))
for(i in 1:1000) {tsam <- sample(477, 477, replace=TRUE); spconind50.bootcon[,i] <- rowMeans(spconind50[,tsam])}

# write function to do bootstrapping over dates
fix(pp.boot.ind.cor)

# get value of actual final correlation
pp.boot.ind.cor(sptail, FALSE, trials=5)

# bootstrap values
spboot.date <- pp.boot.ind.cor(sptail)
plot(density(spboot.date))

