##############################################################################
# simulated data set to test the programs
#Data Simulation with 10 important predictor out of 500
# sample size of 400 individuals. first 200 as training data, 
# next 200 as testing data. 
##############################################################################

set.seed(39)

im <- 10
beta <- runif(im , min=0.1, max=0.2)
betasign <- runif(im, -1, 1)
beta <- beta*sign(betasign)


x<-matrix(0,400,500)
for (i in 1:400)  x[i,] <- rnorm(500)

tl <- length(beta)

ita<-x%*%c(beta, rep(0, 500-tl))

Y<-matrix(0,400,2)
aa<-2
for (i in 1:400){
	Y[i,1]<-rweibull(1,5,aa/(exp(ita[i])))
}

beta <- 5*beta

Y[,2]<-8*runif(400)
Y1<-matrix(0,400,2)
Y1[,1]<-pmin(Y[,1],Y[,2])
Y1[Y[,1]<Y[,2],2]<-1
sum(Y1[,2])/400

library(survival)
fit<-coxph(Surv(Y1[,1],Y1[,2])~x[,1:20])

## survival data are in Y1, covariate (gene expression) data
## are in x.


############################################################################
#   R functions for running the TGDCoxf.R functions. 
############################################################################

#Initail values for function gdcvpl

maxstep <- 1000
nfold <- 10
optip <- 0
thres <- 1 
epi <- 1e-03



# Search for an optimal value for epi so that the minimun value of the CVPL
# curve falls in the middle


fit <- gdcvpl(x[1:200,], Y1[1:200,1], Y1[1:200,2], thres, epi, maxstep, nfold)

cat("optimal for", 1, "=", fit$op," ", epi, "\n")
while( fit$op < 200 || fit$op > (maxstep - 100) ){
	if (fit$op < 200) {
		epi <- epi/3
		fit <- gdcvpl(x[1:200,], Y1[1:200,1], Y1[1:200,2], thres, epi, maxstep, nfold)
	}
			else {
				epi <- 2*epi
				fit <- gdcvpl(x[1:200,], Y1[1:200,1], Y1[1:200,2], thres, epi, maxstep, nfold)				
			}
		cat("optimal for", 1, "=", fit$op," ", epi, "\n")	
}

optip <- fit$op

# fit the predictive model using the optimal parameter
fitsimu <- gd(x[1:200, ], x[201:400, ], Y1[201:400, 1], Y1[201:400, 2], Y1[1:200, 1], Y1[1:200, 2], thres, epi, optip)	

# recover the estimated beta
coef <- findbeta(x[1:200, ], fitsimu$gra, thres, epi, optip)


# plot the survival cirve for the testing data.
tt <- fitsimu$pred[, optip]
tt1 <- rep(0,200)
tt1[tt>0]<-1
pchi<-survdiff(Surv(Y1[201:400,1],Y1[201:400,2])~tt1)$chisq
p1 <- 1-pchisq(pchi,1)
plot(survfit(Surv(Y1[201:400,1],Y1[201:400,2])~tt1))

# end for test TGDCoxf.R


##############################################################################
# R Codes for Running the PRC function, using the principal components. 
##############################################################################

# Find all non-trivial principal components
svdwork<-prcomp(x[1:200,])
in1<-sum(abs(svdwork$sdev)>0.0000000001)

# Fit PCR to principal components
fitpcrwork<-PCRCoxf(svdwork$x[,1:in1], Y1[1:200, 1] , Y1[1:200, 2], 5)

# Find PCR components with p-value < 0.05
sv1 <- 0
pv11<-rep(0, 5)
for( i in 1:5){
	beta3<-coxph(Surv(Y1[1:200, 1] , Y1[1:200, 2])~fitpcrwork$comp[,i])
	sv1[i]<-sqrt(beta3$wald.test)
	pv11[i]<-2*(1-pnorm(sv1[i]))	
}
in2<-min((1:5)[(pv11>0.05)])

# Fit Cox model with significant components 
beta3<-coxph(Surv(Y1[1:200, 1] , Y1[1:200, 2])~fitpcrwork$comp[,1:(in2-1)])

# Identifiy the coefficinet
coefs1<-svdwork$rotation[,1:in1] %*% fitpcrwork$coef[,1:(in2-1)]%*%beta3$coefficients

# Build Predictive model and plot the survival curve
tt <- x[201:400, ]%*%coefs1 - sum(coefs1*apply(x[1:200, ], 2 , mean))
tt1 <- rep(0,200)
tt1[tt>0]<-1
pchi<-survdiff(Surv(Y1[201:400,1],Y1[201:400,2])~tt1)$chisq
p1 <- 1-pchisq(pchi,1)
plot(survfit(Surv(Y1[201:400,1],Y1[201:400,2])~tt1))


# end of test of PCRCoxf.R


############################################################################
#   R functions for running the L1Coxf.R functions
#   Note this needs to be run in Splus, not in R,
#   This is because the "chol" function in R cannot handle
#   Cholesky decomposition when the matrix is sigular. 
############################################################################

# cross validation analysis for the tuning parameter ranging from 0.1 to 5. 
## 
fit <- L1cvpl(x[1:200, ], Y1[1:200,1], Y1[1:200, 2], (1:50)/10, 3)

### Determine the tuning parameter which results in smallest CV scores. 
### one can also plot the value
ss <- (1:50)[fit == min(fit)]/10 

### For the tuning parameter identified by the CV analysis, 
### output the estimate of the beta coefficients. 

fitsimu <- surv4(x[1:200, ], Y1[1:200,1], Y1[1:200, 2], ss)