########################################################################## ##### Functions for Performing Threshold Gradient Descent Procedure ##### For the Cox model (Gui and Li (2005): PSB ##### Both R and Splus can be used to run these functions. ########################################################################## ############################TGDCoxf.R##################################### ########################################################################## ## This function calculates the cross-validated partial likelihood ## score for a given "thres" and for each step from 1 to maxstep. ## This CVPl scores are used to determine the step to stop the iterations. ## This function calls the function "gd". ## ## For a given threshold (between 0 and 1), Users should plot ## cvplscore vs 1:maxstep to determine a step to stop the iteractions. ## ## For a given step determined by the CVPL, run gd again with this step to ## obtain "gra" and run the function "findbeta" (need gra, threshold value, ## epsilon and the step number) to obtain final estimate of beta. ########################################################################## "gdcvpl" <- function(x, time, surv.st, thres, epi, maxstep, nfold) { ### x: n*m predictor matrix, with m predictors. ### time: survival time for n observations. ### surv.st: censoring status for n observations. ### thres: threshold value between 0 and 1 ### epi: maximum factor (step size) scaling gradient for incrementing selected coefficients at each step ### maxstep: maximum number of threshold gradient descent iterations ### nfold: number of cross-validation iterations ### program outputs the cross-validated partial likelihood for given epi and ### thres. epi=episilon used, cvpl=cvplscore from step 1 t0 maxstep, ### op= step # where the max CVPL is achieved. Library(survival) nm <- dim(x) n <- nm[1] m <- nm[2] k <- round(n/nfold) cvpllymgd <- rep(0, (maxstep)) for(i in 1:(nfold-1)) { x1 <- x[ - c(((i - 1) * k + 1):(i * k)), ] x2 <- x[c(((i - 1) * k + 1):(i * k)), ] gralym <- gd(x1, x2, time[c(((i - 1) * k + 1):(i * k))], surv.st[c(((i - 1) *k + 1):(i * k))], time[ - c(((i - 1) *k + 1):(i * k))], surv.st[ - c(((i - 1) * k + 1):(i * k))], thres, epi, maxstep) cvpllymgd <- cvpllymgd + gralym$cvpl } i <- nfold x1 <- x[ 1:((i-1)*k) , ] x2 <- x[ ((i-1)*k+1): n, ] gralym <- gd(x1, x2, time[((i-1)*k+1): n], surv.st[((i-1)*k+1): n], time[1:((i-1)*k)], surv.st[1:((i-1)*k)], thres, epi, maxstep) cvpllymgd <- cvpllymgd + length(surv.st[((i-1)*k+1): n])*gralym$cvpl/k cvplscore <- k*cvpllymgd/nm[1] cvplp <- (1: maxstep)[cvplscore==min(cvplscore)] list(epi=epi, cvpl=cvplscore, op=cvplp) } ########################################################################## # For a given threshold value $thres$, for each step (1 to maxstep), # this function computes the gradient for training score and scores for #testing data set. Function "gdcvpl" call this function for #cross-validation analysis to determine which step to stop the iterations. # This function calls "lik1". ########################################################################## "gd" <- function(x, pred, predtime, predsurv, time, surv.st, thres, epi, maxstep) { ### x: n*m predictor matrix, with m predictors. ### time: survival time for n observations. ### surv.st: censoring status for n observations. ### x, time, surv.st together are training dataset ### pred, predtime, predsurv together are corresponding testing dataset ### thres: threshold value between 0 and 1 ### epi: maximum factor (step size) scaling gradient for incrementing ### selected coefficients at each step ### maxstep: maximum number of threshold gradient descent iterations ### program outputs: ### gra: n*maxstep, is the gradient for the training score (x%*%beta) in ### each steps. ### pred: n1*maxstep, is the score(pred%*%beta) for testing dataset in each ### steps. ### lik: testing data's partial likelihood in each steps. ### cvpl: will be used to calculate the cross-validated partial likelihood ### in "gdcvpl".  x <- as.matrix(x)  nm <- dim(x)  n <- nm[1]  m <- nm[2]  nm1 <- dim(as.matrix(pred))  n1 <- nm1[1]  m1 <- nm1[2]  one <- rep(1, n)  meanx <- drop(one %*% x)/n  meanx <- rep(0, m)  x <- scale(x, meanx, FALSE)  # centers x  normx <- sqrt(drop(one %*% (x^2)))  names(normx) <- NULL  normx <- rep(1, m)  x <- scale(x, FALSE, normx)  # scales x  r <- rank(time)  # need to define time  mu <- matrix(0, n, maxstep)  ita1 <- matrix(0, n1, maxstep)  beta <- beta1 <- rep(0, m)  beta <- as.matrix(beta)  predlik <- 0  cvpl <- 0  f <- 1  while(f <= maxstep) {   beta <- beta1   if(n1 == 1)    ita1[, f] <- sum(pred * beta)   else ita1[, f] <- t(t(beta)%*%t(pred))   predlik[f + 1] <- lik1(ita1[, f], predtime,    predsurv)   ita <- t(t(beta)%*%t(x))   cvpl[f] <- (lik1(ita, time, surv.st) - lik1(c(ita, ita1[, f]), c(time, predtime), c(surv.st, predsurv)))/length(predtime)   epita <- exp(ita)   d <- rep(0, n)   dono <- rep(0, n)   for(i in 1:n) {    d[i] <- sum(surv.st[r == r[i]])    dono[i] <- sum(epita[r >= r[i]])   }   risk <- d/dono   culrisk <- rep(0, n)   for(i in 1:n) {    culrisk[i] <- sum(unique(risk[r <= r[i]]))   }   mu[, f] <- surv.st - epita * culrisk   gradient <- t(x) %*% mu[, f]   gra1 <- gradient   gra1[abs(gradient) < thres * max(abs(gradient))] <- 0   beta1 <- beta + epi * gra1   f <- f + 1  }  list(gra = mu, pred = ita1, lik = predlik[-1], cvpl = cvpl) } ########################################################################## ##After CVPL determines the "stepno" and output from "gd", ##this function calculate the $\beta$ values. ########################################################################## "findbeta" <- function(x, gra, thres, epi, stepno) { ### x: n*m predictor matrix, with m predictors. ### gra: output from "gd", gradient for x%*%beta in each steps ### thres: threshold value between 0 and 1 ### epi: maximum factor (step size) scaling gradient ### for incrementing selected coefficients at each step ### note that thres and epi should be same as what is in "gd" ### program outputs the estimated beta in step "stepno". nm <- dim(x) n <- nm[1] m <- nm[2] beta <- rep(0, m) beta1 <- beta gra1 <- t(x) %*% gra for(i in 1:stepno) { beta1 <- gra1[, i] beta1[abs(gra1[, i]) < thres * max(abs(gra1[, i]))] <- 0 beta <- beta + beta1 * epi } return(beta) } ########################################################################## ## utility function to return the partial likelihood for Cox model. ########################################################################## "lik1" <- function(score, time, surv.st) { ### return the partial likelihood for Cox model n <- length(score) r <- rank(time) ita <- score epita <- exp(score) d <- rep(0, n) dono <- rep(0, n) for(i in 1:n) { d[i] <- sum(surv.st[r == r[i]]) dono[i] <- sum(epita[r >= r[i]]) } lik <- sum((ita - log(dono)) * surv.st) return(lik) }