# Load etm package
# Reference :
#   Arthur Allignol (2009). etm: Empirical Transition Matrix. R package version 0.4-6.
> library(etm)


# Load a sample data set.
# type2 = "cens" indicates a censoring state.
> event <- read.table("sampledataEC.dat", header=T)
> event$type2 <- ifelse(event$estatus == 0, "cens", event$type2)
> event[1:6,]
    id   estart    estop estatus type1 type2 
  1  1 0.000000 0.258846       1     0     1 
  2  1 0.258846 1.000000       0     1  cens 
  3  2 0.000000 0.083535       1     0     1 
  4  2 0.083535 0.338997       1     1     2 
  5  2 0.338997 0.364257       1     2     3 
  6  2 0.364257 1.000000       0     3  cens 


# Create a data frame for etm function
> trdata <- event[,c("id","type1","type2","estop")]
> dimnames(trdata)[[2]] <- c("id","from","to","time")
> trdata$from <- as.character(trdata$from)
> trdata$to   <- as.character(trdata$to)
> trdata[1:6,]
    id from   to     time
  1  1    0    1 0.258846
  2  1    1 cens 1.000000
  3  2    0    1 0.083535
  4  2    1    2 0.338997
  5  2    2    3 0.364257
  6  2    3 cens 1.000000


# Define the transition matrix
> tra <- matrix(ncol=9, nrow=9, FALSE)
> tra[1,2] <- TRUE
> tra[2,3] <- TRUE
> tra[3,4] <- TRUE
> tra[4,5] <- TRUE
> tra[5,6] <- TRUE
> tra[6,7] <- TRUE
> tra[7,8] <- TRUE
> tra[8,9] <- TRUE
> tra
       [,1]  [,2]  [,3]  [,4]  [,5]  [,6]  [,7]  [,8]  [,9]
 [1,] FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [2,] FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE FALSE
 [3,] FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE
 [4,] FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE
 [5,] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE
 [6,] FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE
 [7,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE
 [8,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
 [9,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE


# data = trdata is the input data frame
# state.names = 0, 1, 2, ..., 8 are the possible transition states
# tra = tra is the transition matrix
# cens.name = "cens" indicates a censoring time
# s = 0 is the starting value for computing the transition probabilities
> tr <- etm(trdata, c("0","1","2","3","4","5","6","7","8"), tra, "cens", 0)


# List of output available from the tr object
> attributes(tr)
  $names
   [1] "est"          "cov"        "time"    "s"        "t"         "trans"  
   [7] "state.names"  "cens.name"  "n.risk"  "n.event"  "delta.na"   

  $class
   [1] "etm"


# To list some event times at which the transition probabilities are computed
> tr$time[c(1:5,length(tr$time))]
  [1] 0.001238 0.003151 0.006937 0.008268 0.009119 1.000000


# Obtaining Aalen-Johansen estimate of transition probability matrix at t = 1
# Reference:
#   Aalen 0. and Johansen S. (1978).  An empirical transition matrix for non-homogeneous
#   Markov chains based on censored observations.  Scandinavian Journal of Statistics,
#   5: 141-150
> est <- tr$est[,,length(tr$time)]
> est
            0         1         2         3          4          5          6           7 8
  0 0.1639261 0.2969284 0.2470731 0.1319243 0.09236413 0.04047041 0.02194808 0.005365594 0
  1 0.0000000 0.1791447 0.2908358 0.1983015 0.17016376 0.08938665 0.05677938 0.015388232 0
  2 0.0000000 0.0000000 0.1035794 0.1683914 0.25143687 0.22286741 0.19229340 0.061431524 0
  3 0.0000000 0.0000000 0.0000000 0.1079637 0.24448346 0.28555386 0.27148810 0.090510835 0
  4 0.0000000 0.0000000 0.0000000 0.0000000 0.13808193 0.34325842 0.38244565 0.136213992 0
  5 0.0000000 0.0000000 0.0000000 0.0000000 0.00000000 0.35555556 0.46444444 0.180000000 0
  6 0.0000000 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000 0.50000000 0.500000000 0
  7 0.0000000 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000 0.00000000 1.000000000 0
  8 0.0000000 0.0000000 0.0000000 0.0000000 0.00000000 0.00000000 0.00000000 0.000000000 1


# To get the estimated covariance matrix at t = 1
# Reference :
#   Andersen P.K., Borgan O., Gill R.D. and Keiding N. (1993).  Statistical models based on
#   counting processes.  Springer Series in Statistics.  New York, NY: Springer
> est.cov <- tr$cov[,,length(tr$time)]
> est.cov[1:9,1:12]
               0 0 1 0 2 0 3 0 4 0 5 0 6 0 7 0 8 0           0 1 1 1 2 1 ...
  0 0 0.0008337584   0   0   0   0   0   0   0   0 -0.0003337303   0   0
  1 0 0.0000000000   0   0   0   0   0   0   0   0  0.0000000000   0   0
  2 0 0.0000000000   0   0   0   0   0   0   0   0  0.0000000000   0   0
  3 0 0.0000000000   0   0   0   0   0   0   0   0  0.0000000000   0   0
  4 0 0.0000000000   0   0   0   0   0   0   0   0  0.0000000000   0   0
  5 0 0.0000000000   0   0   0   0   0   0   0   0  0.0000000000   0   0
  6 0 0.0000000000   0   0   0   0   0   0   0   0  0.0000000000   0   0
  7 0 0.0000000000   0   0   0   0   0   0   0   0  0.0000000000   0   0
  8 0 0.0000000000   0   0   0   0   0   0   0   0  0.0000000000   0   0
  :

# To get P(0,1) from state 1 to 2 only
> trprob(tr, "1 2", 1)
  [1] 0.2908358


# To get the variance of P(0,1) from state 1 to 2 only
> trcov(tr, "1 2", 1)
  [1] 0.001813091


# To get all state occupancy probabilities in matrix form, P(0,t)
> u <- tr$time
>
> probu <- NULL
> for (k in 1:length(u)) {
>   tmp   <- tr$est[,,k]
>   probu <- rbind(probu, tmp[1,])
> } 
>
> probu <- data.frame(probu)
> dimnames(probu)[[2]] <- c("P00","P01","P02","P03","P04","P05","P06","P07","P08")

> probu[c(1:5,(nrow(probu)-5):nrow(probu)),]
            P00       P01       P02       P03        P04        P05        P06         P07 P08
  1   0.9950000 0.0050000 0.0000000 0.0000000 0.00000000 0.00000000 0.00000000 0.000000000   0
  2   0.9900000 0.0100000 0.0000000 0.0000000 0.00000000 0.00000000 0.00000000 0.000000000   0
  3   0.9850000 0.0150000 0.0000000 0.0000000 0.00000000 0.00000000 0.00000000 0.000000000   0
  4   0.9800000 0.0200000 0.0000000 0.0000000 0.00000000 0.00000000 0.00000000 0.000000000   0
  5   0.9750000 0.0250000 0.0000000 0.0000000 0.00000000 0.00000000 0.00000000 0.000000000   0
  :
  380 0.1639261 0.3038337 0.2466588 0.1386309 0.07916644 0.04047041 0.02194808 0.005365594   0
  381 0.1639261 0.3038337 0.2466588 0.1320295 0.08576791 0.04047041 0.02194808 0.005365594   0
  382 0.1639261 0.3038337 0.2401677 0.1385205 0.08576791 0.04047041 0.02194808 0.005365594   0
  383 0.1639261 0.2969284 0.2470731 0.1385205 0.08576791 0.04047041 0.02194808 0.005365594   0
  384 0.1639261 0.2969284 0.2470731 0.1319243 0.09236413 0.04047041 0.02194808 0.005365594   0
  385 0.1639261 0.2969284 0.2470731 0.1319243 0.09236413 0.04047041 0.02194808 0.005365594   0


# To compute the cumulative mean function
> mu <- rep(0, nrow(probu))
> for (k in 1:8) {
>   mu <- mu + k*probu[,k+1]
> }

> mu[c(1:5,(length(mu)-4):length(mu))]
  [1] 0.005000 0.010000 0.015000 0.020000 0.025000 
  [6] 1.907911 1.914402 1.921307 1.927904 1.927904


# Plot of the cumulative mean functions - Aalen-Johansen estimate and true value
postscript("etm.cmf.eventEC.eps", onefile=F, print.it=F)

plot(u, mu, type="s", xlim=c(0,1), ylim=c(0,2), 
     xlab="DAYS  SINCE  STUDY  ENTRY", ylab="CUMULATIVE  MEAN  FUNCTION")
lines(u, 2*u, lty=2)
legend(0, 2, c("AALEN-JOHANSEN  ESTIMATE","TRUE  MEAN  FUNCTION"), lty=c(1,2), bty="n")

dev.off()


# Plot of the state occupancy probabilities for state 1, 2, 3, and 4
postscript("etm.state.prob.eventEC.eps", onefile=F, print.it=F)
par(mfrow=c(2,2), mai=c(0.75,0.75,0.5,0.5))

plot(u, probu$P01, type="s", xlim=c(0,1), ylim=c(0,0.4),
     xlab="DAYS  SINCE  STUDY  ENTRY", ylab="STATE  OCCUPANCY  PROBABILITY")
mtext("STATE 1", side=3, line=0.5)

plot(u, probu$P02, type="s", xlim=c(0,1), ylim=c(0,0.4),
     xlab="DAYS  SINCE  STUDY  ENTRY", ylab="STATE  OCCUPANCY  PROBABILITY")
mtext("STATE 2", side=3, line=0.5)

plot(u, probu$P03, type="s", xlim=c(0,1), ylim=c(0,0.4),
     xlab="DAYS  SINCE  STUDY  ENTRY", ylab="STATE  OCCUPANCY  PROBABILITY")
mtext("STATE 3", side=3, line=0.5)

plot(u, probu$P04, type="s", xlim=c(0,1), ylim=c(0,0.4),
     xlab="DAYS  SINCE  STUDY  ENTRY", ylab="STATE  OCCUPANCY  PROBABILITY")
mtext("STATE 4", side=3, line=0.5)

dev.off()