#includes the following functions:

#re ... function for calculating the Re measure
#called with
#re(fit)
# works with coxph, survreg and aareg

#summary.re ... function for calculating the Re in time (cumulative and time window)
#called with
#summary(re(fit))


re <- function (fit, ...) UseMethod("re")



re.coxph <- function(fit,Gmat){
	#################################################################################################
	# A function that calculates the Re measure of explained variation of the model
	# Arguments: fit: A Cox model object
	#	     Gmat: a matrix of weights - if omitted, calculated as inverse KM
	# 
	# Returned values:
	#  Re: the Re measure
	#  Re.imp: the Re measure assuming constant coefficients after the last event time
	#  Re.fix: the Re measure asuming a constant value of coefficient throughout the follow-up
	#          time(gives NA in case of counting process type times (start,stop)) 
	#      se: standard error of re
	#    sen0: standard error of re, calculated with expected Q	
	#       C: C index (generalized to allow for time-dependent covariates and weighted to ensure independence of 
	#	   the censoring process)
	#    r2nw: unweighted measure
	#
	# Notes: left truncated data not yet implemented.
	#	 when calculating the re.imp, the data-set should not be split after the last event time, i.e. each
	#	 data line should represent a different subject
	#	
	#################################################################################################
	
	require(survival)
	Y <- fit$y
	f.type <- attr(Y, "type")  
	if (f.type== "right") Y <- cbind(rep(0,nrow(Y)),Y)
	sort.it <- order(Y[,2],-Y[,3]) 
	bx <- fit$linear.predictors[sort.it] - mean(fit$linear.predictors)
	Y <- Y[sort.it,]
	
	n.ind <- nrow(Y)
	ti <- sort(unique(Y[Y[,3]==1,2]))			#ordered unique times of events
	n.times <- length(ti)	 				#number of unique times of events	
	
	
	r2<-NULL
		if(missing(Gmat)){
		 km.inv <- my.survfit(Y[,1],Y[,2],Y[,3])		#now includes time-dependent data
		 G <- km.inv$surv.i2[km.inv$n.event != 0]								
		 dSt <- 1/G						#is this needed (we already have only at event times)
		 Gmat <- matrix(G,byrow=T,ncol=length(G),nrow=n.ind)	#times in columns, ind. in rows
		}
		else if(is.null(dim(Gmat))){
			if(length(Gmat)!=n.times)stop("Wrong length of weights")
			else Gmat <- matrix(Gmat,byrow=T,ncol=length(Gmat),nrow=n.ind)
		}
		else if(any(dim(Gmat)!=c(n.ind,n.times)))stop("Wrong dimension of weights")
		Gmat <- Gmat[sort.it,]					#sort with respect to event times
		
	 			 
		ebxun <- ebx <- exp(bx)
		rg <- meanr <- smin <- newvar1 <- newvar2 <- newvar3 <- eden <- enum <-  rep(NA,n.times)
		for(it in 1:length(ti)){
			k <- sum(Y[Y[,3]==1,2]==ti[it])			#number of ties
						
			inx <- (Y[,2] >= ti[it] & Y[,1]< ti[it])	#at risk
									#reverse the order to get the right ranks
			rangi <- (rank(-bx[inx]) - .5)/Gmat[inx,it]+.5	#ranks for each individual
			
			rg[it] <- sum(rangi[1:k]/(Gmat[inx,it])[1:k])	#rank of the one who had the event (sum of ranks if ties)
			
			r0 <- mean(rangi)
			meanr[it] <-  r0*sum(1/((Gmat[inx,it])[1:k]))	#mean rank at this time 
			#browser()
			rP <- (.5+ .5*sum((Y[inx,2] == ti[it] & Y[inx,1]< ti[it])/Gmat[inx,it]))
			smin[it] <- rP*sum(1/((Gmat[inx,it])[1:k]))	#min rank (1 if no ties)
			
			pi <- ebx[inx]/sum(ebx[inx])			#individual hazard (breslow * ebx)
			newvar1[it] <- sum((rangi-r0)^2*pi/(Gmat[inx,it])^2)	#term1 -  for nom
			newvar2[it] <- sum((rP-r0)^2*pi/(Gmat[inx,it])^2)	#term2 -  for denom
			newvar3[it] <- sum((r0-rangi)*(r0-rP)*pi/(Gmat[inx,it])^2)	#term3 - covariance
			enum[it] <- sum((r0-rangi)*pi/(Gmat[inx,it]))		#expected numerator=Q1
			eden[it] <- sum((r0-rP)*pi/(Gmat[inx,it]))	#expected denominator=Q2
	
	 	}
	 	num <- sum(meanr-rg)
	 	den<- sum(meanr-smin)
	 	eden <- sum(eden)
	 	enum <- sum(enum)
	 	r2 <- num/den	#the Re measure
	 	r2nw <- sum((meanr-rg))/sum((meanr-smin))
	 	newvar <- sqrt(sum(newvar1)/den^2-2*sum(newvar3)*num/den^3+sum(newvar2)*num^2/den^4)
	 	newvare <- sqrt(sum(newvar1)/eden^2-2*sum(newvar3)*enum/eden^3+sum(newvar2)*enum^2/eden^4)

	 	ebx <- sort(ebx)					#this is for Jfixed
	 	Jf <- NA
		if(f.type=="right"){
			nd <- length(ebx)
			imat <- matrix(ebx,nrow=nd,ncol=nd)
			jmat <- matrix(ebx,nrow=nd,ncol=nd,byrow=T)
			iimat <- imat/(imat+jmat)
			jjmat <- jmat/(imat+jmat)
			Jf <- 2*sum((jjmat-iimat)[upper.tri(jjmat)])/(sum(upper.tri(jjmat))*2)
		}
		
		#and now for the part after last event time:
		ebxun <- ebxun[Y[,2]>=max(ti)&Y[,3]==0]
		
		km.inv <- my.survfit(Y[,1],Y[,2],Y[,3])		
		nd <- sum(km.inv$n.cens[km.inv$time>max(ti)])
		if((nd>0)&(!any(km.inv$n.cens[km.inv$time>max(ti)]==0))){		#if there is at least 1 indiv. and no time-splitting
		   
			ebx <- sort(ebxun)
			sumt <- 0
			for(it in 1:nd){
				sumt1 <- sum((ebx[it]-ebx[1:it])/(ebx[it]+ebx[1:it]))
				sumt <- sumt + sumt1
			}
			Jfin <- sumt/2

			Jfin <- (num+Jfin)/(den +  (nd*(nd-1))/4)
		}

		else Jfin <- NA 		
		
 		out <- list(Re=r2,Re.imp=Jfin,Re.fix=Jf,se=newvare,C=(r2+1)/2,sen0=newvar,times=ti,meanr=meanr,ranks=rg,weights=Gmat,perfr=smin,type="coxph",nd=nd,r2nw=r2nw)
 		class(out) <- "re"
	 	out
}

re.aareg <- function(fit,Gmat){
	#################################################################################################
	# A function that calculates the Re measure of explained variation of the model
	# Arguments: fit: An aareg model object
	#	     Gmat: a matrix of weights - if omitted, calculated as inverse KM
	# 
	# Returned values:
	#  Re: the Re measure
	#  
	#      se: standard error of re
	#       C: C index (generalized to allow for time-dependent covariates and weighted to ensure independence of 
	#	   the censoring process)
	# 
	#
	# Notes: left truncated data not yet implemented.
	#	
	#################################################################################################
	
	require(survival)
	if(length(fit$y)==0)stop("The aareg model must be fitted with 'y=TRUE' option")
	if(length(fit$x)==0)stop("The aareg model must be fitted with 'x=TRUE' option")
	Y <- fit$y
	sort.it <- order(Y[,2],-Y[,3]) 
	X <- fit$x
	X <- cbind(rep(1,nrow(X)),X)
	Y[Y[,1]==-1,1] <- 0
	Y <- Y[sort.it,]
	X <- X[sort.it,,drop=FALSE]
	n.ind <- nrow(Y)
	
	ti <- sort(unique(Y[Y[,3]==1,2]))			#ordered unique times of events
	n.times <- length(ti)	 				#number of unique times of events	
	
	#nelson-Aalen:
	d <- survfit(Surv(Y[,1],Y[,2],Y[,3])~1,type="fleming-harrington")
	cumh <- -log(d$surv[d$n.event!=0])
	haz <- c(cumh[1],diff(cumh))
	
	#change the output for ties in surv. time (sum up the contributions in those intervals)
	d1 <- fit$times
	d2 <- c(d1[-1],d1[length(d1)]+1)
	ic <- which(d1!=d2)
	fc <- apply(fit$coef,2,cumsum)
	fc <- fc[ic,]
	fd <- apply(fc,2,function(x)c(x[1],diff(x)))
	#add lines for times at the end	
	
	if(n.times>nrow(fd)){
		nman <- n.times-nrow(fd)
		fd <- rbind(fd,matrix(0,nrow=nman,ncol=ncol(fd)))		
		fd[(n.times-nman+1):n.times,1] <- haz[(n.times-nman+1):n.times]
	}
	
	bx <- (X%*%t(fd))
	
	if(missing(Gmat)){
		km.inv <- my.survfit(Y[,1],Y[,2],Y[,3])		#now includes time-dependent data
		G <- km.inv$surv.i2[km.inv$n.event != 0]							
		dSt <- 1/G
		Gmat <- matrix(G,byrow=T,ncol=length(G),nrow=n.ind)	#casi v stolpcih, ljudje v vrsticah	
	}
	else if(is.null(dim(Gmat))){
		if(length(Gmat)!=n.times)stop("Wrong length of weights")
		else Gmat <- matrix(Gmat,byrow=T,ncol=length(Gmat),nrow=n.ind)
	}
	else if(any(dim(Gmat)!=c(n.ind,n.times)))stop("Wrong dimension of weights")

	 	
	rg <- meanr <- smin <- newvar1 <- newvar2 <- newvar3 <- enum <- eden <-  rep(NA,n.times)
	for(it in 1:n.times){
			
		k <- sum(Y[Y[,3]==1,2]==ti[it])			#number of ties
		
		inx <- (Y[,2] >= ti[it] & Y[,1]< ti[it])	#at risk
		pi <- bx[inx,it]			#hazard for each indiv.

		rangi <- (rank(-bx[inx,it]) - .5)/Gmat[inx,it]+.5	#ranks for each individual

		r0 <- mean(rangi)
		rP <- (.5+ .5*sum((Y[inx,2] == ti[it] & Y[inx,1]< ti[it])/Gmat[inx,it]))

		rg[it] <- sum(rangi[1:k]/(Gmat[inx,it])[1:k])	#rank of the one who had the event (sum of ranks if ties)
		meanr[it] <-  r0*sum(1/((Gmat[inx,it])[1:k]))	#mean rank at this time 
		smin[it] <- rP*sum(1/((Gmat[inx,it])[1:k]))		

		newvar1[it] <- sum((rangi-r0)^2*pi/(Gmat[inx,it])^2)	#term1 -  for nom
		newvar2[it] <- sum((rP-r0)^2*pi/(Gmat[inx,it])^2)	#term2 -  for denom
		newvar3[it] <- sum((r0-rangi)*(r0-rP)*pi/(Gmat[inx,it])^2)	#term3 - covariance
		enum[it] <- sum((r0-rangi)*pi/(Gmat[inx,it]))		#expected numerator
		eden[it] <- sum((r0-rP)*pi/(Gmat[inx,it]))	#expected denominator
		
	}
	num <- sum(meanr-rg)
	den<- sum(meanr-smin)
	enum <- sum(enum)
	eden <- sum(eden)
	r2 <- num/den	#the Re measure
	newvar <- sqrt(sum(newvar1)/den^2-2*sum(newvar3)*num/den^3+sum(newvar2)*num^2/den^4)
	newvare <- sqrt(sum(newvar1)/eden^2-2*sum(newvar3)*enum/eden^3+sum(newvar2)*enum^2/eden^4)
	out <- list(Re=r2,se=newvare,C=(r2+1)/2,sen0=newvar,type="aareg")
	class(out) <- "re"
	out
}

re.survreg <- function(fit,Gmat){
	#################################################################################################
	# A function that calculates the Re measure of explained variation of the model
	# Arguments: fit: A survreg model object
	#	    
	# 
	# Returned values:
	#  Re: the Re measure
	#      se: standard error of re
	#       C: C index (generalized to allow for time-dependent covariates and weighted to ensure independence of 
	#	   the censoring process)
	#  r2nw,senw: unweighted measure
	#
	#Notes: times should not be negative
	#	
	#################################################################################################
	
	require(survival)
	Y <- fit$y
	Y <- cbind(rep(0,nrow(Y)),Y)
	if(any(Y[,2]<0))warning("Negative follow-up times")
	#nazaj antilogaritmiramo cas:
	if(fit$dist%in%c("weibull","exponential","loglogistic","lognormal"))Y[,2] <- exp(Y[,2])
	
	sort.it <- order(Y[,2],-Y[,3]) 
	Y <- Y[sort.it,]

	if(fit$dist=="weibull"|fit$dist=="exponential")bx <- -(fit$linear.predictors[sort.it] - fit$coef[1])
	bx <- -fit$linear.predictors[sort.it]
	if(fit$dist=="weibull"|fit$dist=="exponential"){
	 gamma <- (1/fit$scale)
	 lambda <- exp(-fit$coef[1])^gamma
	}
	
	n.ind <- nrow(Y)
	ti <- sort(unique(Y[Y[,3]==1,2]))			#ordered unique times of events
	n.times <- length(ti)	 				#number of unique times of events	
	
	r2<-NULL
	
	if(missing(Gmat)){
		km.inv <- my.survfit(Y[,1],Y[,2],Y[,3])		#now includes time-dependent data
		G <- km.inv$surv.i2[km.inv$n.event != 0]							
		dSt <- 1/G
		Gmat <- matrix(G,byrow=T,ncol=length(G),nrow=n.ind)	#casi v stolpcih, ljudje v vrsticah	
	}
	else if(is.null(dim(Gmat))){
		if(length(Gmat)!=n.times)stop("Wrong length of weights")
		else Gmat <- matrix(Gmat,byrow=T,ncol=length(Gmat),nrow=n.ind)
	}
	else if(any(dim(Gmat)!=c(n.ind,n.times)))stop("Wrong dimension of weights")

	 			 
		rg <- meanr <- smin <- newvar1 <- newvar2 <- newvar3 <- eden <- enum  <- rep(NA,n.times)
		for(it in 1:length(ti)){
			k <- sum(Y[Y[,3]==1,2]==ti[it])			#number of ties
			inx <- (Y[,2] >= ti[it] & Y[,1]< ti[it])	#at risk
			if(fit$dist=="weibull"|fit$dist=="exponential"){
				if(it==1) pi <- lambda*exp(bx)[inx]*(ti[it])^gamma						
				else pi <- lambda*exp(bx)[inx]*((ti[it])^gamma-(ti[it-1])^gamma)	
			}
			else if(fit$dist=="gaussian"|fit$dist=="lognormal"){
				 bxi <- -bx[inx]		#dam ga nazaj na pozitivne
				 casi <- ti
				 if(fit$dist=="lognormal") {
		       			 casi <- log(ti)
		       		 }
		       		 S  <- 1-pnorm((casi[it]- bxi)/fit$scale)
				 S0 <- 1
				 if(it!=1) S0 <- 1-pnorm((casi[it-1]- bxi)/fit$scale)
				 pi <- -log(S/S0)
			}			
			else if(fit$dist=="logistic"|fit$dist=="loglogistic"){
		       		 bxi <- -bx[inx]
		       		 casi <- ti
		       		 if(fit$dist=="loglogistic") {
		       		 casi <- log(ti)
		       		 }
		       		 w <- exp((casi[it]- bxi)/fit$scale)
		       		 S <- 1/(1+w)
		       		 S0 <- 1
				 if(it!=1){
					w0 <- exp((casi[it-1]- bxi)/fit$scale)
					S0 <- 1/(1+w0)
				 }
				 pi <- -log(S/S0)
			}			
			rangi <- (rank(-bx[inx]) - .5)/Gmat[inx,it]+.5	#ranks for each individual

			r0 <- mean(rangi)
			rP <- (.5+ .5*sum((Y[inx,2] == ti[it] & Y[inx,1]< ti[it])/Gmat[inx,it]))

			rg[it] <- sum(rangi[1:k]/(Gmat[inx,it])[1:k])	#rank of the one who had the event (sum of ranks if ties)
			meanr[it] <-  r0*sum(1/((Gmat[inx,it])[1:k]))	#mean rank at this time 
			smin[it] <- rP*sum(1/((Gmat[inx,it])[1:k]))		

			newvar1[it] <- sum((rangi-r0)^2*pi/(Gmat[inx,it])^2)	#term1 -  for nom
			newvar2[it] <- sum((rP-r0)^2*pi/(Gmat[inx,it])^2)	#term2 -  for denom
			newvar3[it] <- sum((r0-rangi)*(r0-rP)*pi/(Gmat[inx,it])^2)	#term3 - covariance
			enum[it] <- sum((r0-rangi)*pi/(Gmat[inx,it]))		#expected numerator
			eden[it] <- sum((r0-rP)*pi/(Gmat[inx,it]))	#expected denominator
				
	 	}
		num <- sum(meanr-rg)
		den<- sum(meanr-smin)
		enum <- sum(enum)
		eden <- sum(eden)
		r2 <- num/den	#the Re measure
		newvar <- sqrt(sum(newvar1)/den^2-2*sum(newvar3)*num/den^3+sum(newvar2)*num^2/den^4)
		newvare <- sqrt(sum(newvar1)/eden^2-2*sum(newvar3)*enum/eden^3+sum(newvar2)*enum^2/eden^4)

		out <- list(Re=r2,se=newvare,C=(r2+1)/2,sen0=newvar,times=ti,meanr=meanr,perfr=smin,ranks=rg,weights=Gmat,type="survreg")
		class(out) <- "re"
		out	
}



print.re <- function(x, digits=4, ...){

	  cat("Re measure=",round(x$Re,digits), ",    SE=", round(x$se,digits), "\n \n",sep="")
 	  if(x$type=="coxph"){
		  if(x$nd){
			cat("Number of censored after the last failure time: ", x$nd, "\n")
			if(!is.na(x$Re.imp))cat("Re corrected for the censoring at the end \n (assuming the coeff. and covariate values fixed after the last failure time): ", round(x$Re.imp,digits),"\n \n")
			else cat("Re corrected for the censoring at the end can not be computed - the follow-up \n time after the last event time is split into several intervals \n")
		  }
		  if(!is.na(x$Re.fix)){
			cat("Integrated version of Re (assuming all coeff. and covariate values fixed):", round(x$Re.fix,digits),"\n")	
		  }
 	  }
 	  cat("\n")
 	  cat("Generalized C index=",round(x$C,digits), "\n")
 	  invisible(x)
 }



summary.re <- function(object, times, band=5,...){

	####################################
	# A function for calculating the RE in time 
	# object ... an result of a re(fit) call
	# times ... times at which we wish to calculate Re (unique event times by default
	# band ... a window for calculating the dRe 
	#######################################
	
	num <- cumsum(object$meanr-object$ranks)
	den <- cumsum(object$meanr-object$perfr) 
	
	Rti <- num/den
	
	dn <- length(num) - band 
	
	num1 <- num[-(1:band)]
	den1 <- den[-(1:band)]
	
	dRti <- (num1 - num[1:dn])/(den1 - den[1:dn])
		
	dRti <- c(rep(NA,band-1),Rti[band],dRti)
	
	tab <- data.frame(times=object$times,Rti=Rti,dRti=dRti)
	if(!missing(times)) {
		search.time <- function(time,times)max(which(times < time))
		inx <- unlist(lapply(times,search.time, tab$times)	)
		tab[inx,1] <- times
		tab <- tab[inx,]
	}
	tab
}
























"my.survfit" <- function(start,stop,event){
	#internal function for calculation of weights (allows counting process notation)
	
	if(missing(start)) start <- rep(0,length(stop))		#if no count. process notation, set everyone to start at 0

	data <- data.frame(start=start,stop=stop,event=event)	
	
	data <- data[order(data$start),]			#ordered according to the starting time
	data$Y <- unlist(lapply(data$stop,function(x)sum(data$start<=x)))	#for each time point: the number of indiv. that started before (or at) it
		
	data <- data[order(data$stop,-data$event),]		#order in time, put events before censoring
	
	howmany <- nrow(data)					#total number of rows
	
	lt.t1 <- c(0,data$stop[-nrow(data)])			#aux. vector for searching the stop times	
	lt.t2 <- c(data$stop[-1],data$stop[nrow(data)]+1)
	lt1 <- which(data$stop!=lt.t1)				#first case at a time point
	lt2 <- which(data$stop!=lt.t2)				#last case at a time point
	
	ct.t1 <- c(-1,data$start[-nrow(data)])			#aux. vector for searching the start times
	ct.t2 <- c(data$start[-1],data$start[nrow(data)]+1)
	ct1 <- which(data$start!=ct.t1)				#first case at a time point
	ct2 <- which(data$start!=ct.t2)				#last case at a time point
	n.incom <- rep(0,length(lt1))				#prepare a vector that will contain the no. of incoming indiv. at a time point (length=no. of unique time points)
	inx <- unlist(lapply(data$start[ct1],function(x)min(which(data$stop[lt1]>=x)))) #the index of the first one to fail after a starting point
	incom.sum <- diff(c(0,ct2))				#the number of incoming
	if(length(inx)>1){
	   if(inx[1]==inx[2]){					#if indiv. come in at time 0 and first stop time
		incom.sum <- c(sum(incom.sum[1:2]),incom.sum[-(1:2)])    #sum them up
		inx <- unique(inx)					#only one value per each stop time
	   }
	}
	n.incom[inx] <- incom.sum				#at each event time, we need the number of patients that got in just before it.
	n.incom2 <- n.incom					
	n.incom2[1] <- n.incom2[1] - sum(data$start==0)		#the number of additional patients to come in at time 0 is set to 0
	 
	
		evcum <- cumsum(data$event)			#number of events up to a certain time point
		evcum <- evcum[lt2]				#number of events for t<=t_i
		cencum <- cumsum(data$event==0)			#number of censored
		cencum <- cencum[lt2]				#number of censored t<=t_i
		n.event <- diff(c(0,evcum))			#number of events at each unique event time
		n.cens <- diff(c(-sum(data$start==0),cencum))	#number of censored at each unique event time, the first number is the total of the patients
		n.t.cens <- n.cens - n.incom			#number truly censored: number censored - number of incoming (0 at time 0)
		n.risk <- (data$Y - (0:(howmany-1)))[lt1]-n.incom2 #number at risk at each event time: no.started before or at this time - no. of incoming at this time - 1 per row before this time
		
		kmji <- 1- n.event/n.risk			#proportion of survivors
		
		kmji.i <- 1- n.t.cens/(n.risk-n.event)		#the events are taken out of the risk set before calculating the G(t_i)
		
		km <- (cumprod(kmji))				#km	
		km.i <- cumprod(kmji.i)				#km for censoring			
	
	#correction for the last value (if division by 0) (this happens if the last time is an event time)
	inx <- which(is.na(km.i))				#missing at the end, only needed if not lagged?
	if(length(inx)){
		km.i[inx] <- km.i[min(inx)-1]	# carrie last value forward
	}
	
	km.i2 <- c(1,km.i[-length(km.i)])			#this gives the lagged weight (weight at time just before t)
	
	list(surv=km,surv.i=km.i,n.event=n.event,n.cens=n.t.cens,time=data$stop[lt1],n.risk=n.risk,surv.i2=km.i2)
}